Mercurial > repos > iuc > limma_voom

diff limma_voom.R @ 3:38aab66ae5cb draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 1640914b9812b0482a3cf684f05465f8d9cfdc65
author: iuc
date: Wed, 31 Jan 2018 12:45:42 -0500
parents: a330ddf43861
children: a61a6e62e91f
--- a/limma_voom.R	Thu Sep 07 05:27:27 2017 -0400
+++ b/limma_voom.R	Wed Jan 31 12:45:42 2018 -0500
@@ -2,31 +2,40 @@
 # outputs a table of top expressions as well as various plots for differential
 # expression analysis
 #
-# ARGS: 1.countPath       -Path to RData input containing counts
-#       2.annoPath        -Path to input containing gene annotations
-#       3.htmlPath        -Path to html file linking to other outputs
-#       4.outPath         -Path to folder to write all output to
-#       5.rdaOpt          -String specifying if RData should be saved
-#       6.normOpt         -String specifying type of normalisation used
-#       7.weightOpt       -String specifying usage of weights
-#       8.contrastData    -String containing contrasts of interest
-#       9.cpmReq          -Float specifying cpm requirement
-#       10.sampleReq      -Integer specifying cpm requirement
-#       11.pAdjOpt        -String specifying the p-value adjustment method
-#       12.pValReq        -Float specifying the p-value requirement
-#       13.lfcReq         -Float specifying the log-fold-change requirement
-#       14.normCounts     -String specifying if normalised counts should be output
-#       15.factPath       -Path to factor information file
-#       16.factorData     -Strings containing factor names and values if manually input 
+# ARGS: htmlPath", "R", 1, "character"      -Path to html file linking to other outputs
+#       outPath", "o", 1, "character"       -Path to folder to write all output to
+#       filesPath", "j", 2, "character"     -JSON list object if multiple files input
+#       matrixPath", "m", 2, "character"    -Path to count matrix
+#       factFile", "f", 2, "character"      -Path to factor information file
+#       factInput", "i", 2, "character"     -String containing factors if manually input
+#       annoPath", "a", 2, "character"      -Path to input containing gene annotations
+#       contrastData", "C", 1, "character"  -String containing contrasts of interest
+#       cpmReq", "c", 2, "double"           -Float specifying cpm requirement
+#       cntReq", "z", 2, "integer"          -Integer specifying minimum total count requirement
+#       sampleReq", "s", 2, "integer"       -Integer specifying cpm requirement
+#       normCounts", "x", 0, "logical"      -String specifying if normalised counts should be output
+#       rdaOpt", "r", 0, "logical"          -String specifying if RData should be output
+#       lfcReq", "l", 1, "double"           -Float specifying the log-fold-change requirement
+#       pValReq", "p", 1, "double"          -Float specifying the p-value requirement
+#       pAdjOpt", "d", 1, "character"       -String specifying the p-value adjustment method
+#       normOpt", "n", 1, "character"       -String specifying type of normalisation used
+#       robOpt", "b", 0, "logical"          -String specifying if robust options should be used
+#       trend", "t", 1, "double"            -Float for prior.count if limma-trend is used instead of voom
+#       weightOpt", "w", 0, "logical"       -String specifying if voomWithQualityWeights should be used
 #
-# OUT:  Voom Plot
-#       BCV Plot
-#       MA Plot
+# OUT:
+#       MDS Plot
+#       Voom/SA plot
+#       MD Plot
 #       Expression Table
 #       HTML file linking to the ouputs
+# Optional:
+#       Normalised counts Table
+#       RData file
+#
 #
 # Author: Shian Su - registertonysu@gmail.com - Jan 2014
-# Modified by: Maria Doyle - Jun 2017
+# Modified by: Maria Doyle - Jun 2017, Jan 2018
 
 # Record starting time
 timeStart <- as.character(Sys.time())
@@ -38,9 +47,10 @@
 library(edgeR, quietly=TRUE, warn.conflicts=FALSE)
 library(limma, quietly=TRUE, warn.conflicts=FALSE)
 library(scales, quietly=TRUE, warn.conflicts=FALSE)
+library(getopt, quietly=TRUE, warn.conflicts=FALSE)
 
 if (packageVersion("limma") < "3.20.1") {
-  stop("Please update 'limma' to version >= 3.20.1 to run this tool")
+    stop("Please update 'limma' to version >= 3.20.1 to run this tool")
 }
 
 ################################################################################
@@ -49,185 +59,276 @@
 # Function to sanitise contrast equations so there are no whitespaces
 # surrounding the arithmetic operators, leading or trailing whitespace
 sanitiseEquation <- function(equation) {
-  equation <- gsub(" *[+] *", "+", equation)
-  equation <- gsub(" *[-] *", "-", equation)
-  equation <- gsub(" *[/] *", "/", equation)
-  equation <- gsub(" *[*] *", "*", equation)
-  equation <- gsub("^\\s+|\\s+$", "", equation)
-  return(equation)
+    equation <- gsub(" *[+] *", "+", equation)
+    equation <- gsub(" *[-] *", "-", equation)
+    equation <- gsub(" *[/] *", "/", equation)
+    equation <- gsub(" *[*] *", "*", equation)
+    equation <- gsub("^\\s+|\\s+$", "", equation)
+    return(equation)
 }
 
 # Function to sanitise group information
 sanitiseGroups <- function(string) {
-  string <- gsub(" *[,] *", ",", string)
-  string <- gsub("^\\s+|\\s+$", "", string)
-  return(string)
+    string <- gsub(" *[,] *", ",", string)
+    string <- gsub("^\\s+|\\s+$", "", string)
+    return(string)
 }
 
 # Function to change periods to whitespace in a string
 unmake.names <- function(string) {
-  string <- gsub(".", " ", string, fixed=TRUE)
-  return(string)
+    string <- gsub(".", " ", string, fixed=TRUE)
+    return(string)
 }
 
 # Generate output folder and paths
 makeOut <- function(filename) {
-  return(paste0(outPath, "/", filename))
+    return(paste0(opt$outPath, "/", filename))
 }
 
 # Generating design information
 pasteListName <- function(string) {
-  return(paste0("factors$", string))
+    return(paste0("factors$", string))
 }
 
 # Create cata function: default path set, default seperator empty and appending
 # true by default (Ripped straight from the cat function with altered argument
 # defaults)
-cata <- function(..., file = htmlPath, sep = "", fill = FALSE, labels = NULL, 
-                 append = TRUE) {
-  if (is.character(file)) 
-    if (file == "") 
-      file <- stdout()
-  else if (substring(file, 1L, 1L) == "|") {
-    file <- pipe(substring(file, 2L), "w")
-    on.exit(close(file))
-  }
-  else {
-    file <- file(file, ifelse(append, "a", "w"))
-    on.exit(close(file))
-  }
-  .Internal(cat(list(...), file, sep, fill, labels, append))
+cata <- function(..., file = opt$htmlPath, sep = "", fill = FALSE, labels = NULL,
+               append = TRUE) {
+    if (is.character(file))
+        if (file == "")
+            file <- stdout()
+        else if (substring(file, 1L, 1L) == "|") {
+            file <- pipe(substring(file, 2L), "w")
+            on.exit(close(file))
+        }
+        else {
+        file <- file(file, ifelse(append, "a", "w"))
+      on.exit(close(file))
+        }
+    .Internal(cat(list(...), file, sep, fill, labels, append))
 }
 
 # Function to write code for html head and title
 HtmlHead <- function(title) {
-  cata("<head>\n")
-  cata("<title>", title, "</title>\n")
-  cata("</head>\n")
+    cata("<head>\n")
+    cata("<title>", title, "</title>\n")
+    cata("</head>\n")
 }
 
 # Function to write code for html links
 HtmlLink <- function(address, label=address) {
-  cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n")
+    cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n")
 }
 
 # Function to write code for html images
 HtmlImage <- function(source, label=source, height=600, width=600) {
-  cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height)
-  cata("\" width=\"", width, "\"/>\n")
+    cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height)
+    cata("\" width=\"", width, "\"/>\n")
 }
 
 # Function to write code for html list items
 ListItem <- function(...) {
-  cata("<li>", ..., "</li>\n")
+    cata("<li>", ..., "</li>\n")
 }
 
 TableItem <- function(...) {
-  cata("<td>", ..., "</td>\n")
+    cata("<td>", ..., "</td>\n")
 }
 
 TableHeadItem <- function(...) {
-  cata("<th>", ..., "</th>\n")
+    cata("<th>", ..., "</th>\n")
 }
 
 ################################################################################
 ### Input Processing
 ################################################################################
 
-# Collects arguments from command line
-argv <- commandArgs(TRUE)
+# Collect arguments from command line
+args <- commandArgs(trailingOnly=TRUE)
 
-# Grab arguments
-countPath <- as.character(argv[1])
-annoPath <- as.character(argv[2])
-htmlPath <- as.character(argv[3])
-outPath <- as.character(argv[4])
-rdaOpt <- as.character(argv[5])
-normOpt <- as.character(argv[6])
-weightOpt <- as.character(argv[7])
-contrastData <- as.character(argv[8])
-cpmReq <- as.numeric(argv[9])
-sampleReq <- as.numeric(argv[10])
-pAdjOpt <- as.character(argv[11])
-pValReq <- as.numeric(argv[12])
-lfcReq <- as.numeric(argv[13])
-normCounts <- as.character(argv[14])
-factPath <- as.character(argv[15])
-# Process factors
-if (as.character(argv[16])=="None") {
-    factorData <- read.table(factPath, header=TRUE, sep="\t")
-    factors <- factorData[,-1, drop=FALSE]
-}  else { 
-    factorData <- list()
-    for (i in 16:length(argv)) {
-        newFact <- unlist(strsplit(as.character(argv[i]), split="::"))
-        factorData <- rbind(factorData, newFact)
-    } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
+# Get options, using the spec as defined by the enclosed list.
+# Read the options from the default: commandArgs(TRUE).
+spec <- matrix(c(
+    "htmlPath", "R", 1, "character",
+    "outPath", "o", 1, "character",
+    "filesPath", "j", 2, "character",
+    "matrixPath", "m", 2, "character",
+    "factFile", "f", 2, "character",
+    "factInput", "i", 2, "character",
+    "annoPath", "a", 2, "character",
+    "contrastData", "C", 1, "character",
+    "cpmReq", "c", 1, "double",
+    "totReq", "y", 0, "logical",
+    "cntReq", "z", 1, "integer",
+    "sampleReq", "s", 1, "integer",
+    "normCounts", "x", 0, "logical",
+    "rdaOpt", "r", 0, "logical",
+    "lfcReq", "l", 1, "double",
+    "pValReq", "p", 1, "double",
+    "pAdjOpt", "d", 1, "character",
+    "normOpt", "n", 1, "character",
+    "robOpt", "b", 0, "logical",
+    "trend", "t", 1, "double",
+    "weightOpt", "w", 0, "logical"),
+    byrow=TRUE, ncol=4)
+opt <- getopt(spec)
 
-    # Set the row names to be the name of the factor and delete first row
-    row.names(factorData) <- factorData[, 1]
-    factorData <- factorData[, -1]
-    factorData <- sapply(factorData, sanitiseGroups)
-    factorData <- sapply(factorData, strsplit, split=",")
-    factorData <- sapply(factorData, make.names)
-    # Transform factor data into data frame of R factor objects
-    factors <- data.frame(factorData)
 
+if (is.null(opt$matrixPath) & is.null(opt$filesPath)) {
+    cat("A counts matrix (or a set of counts files) is required.\n")
+    q(status=1)
+}
+
+if (is.null(opt$cpmReq)) {
+    filtCPM <- FALSE
+} else {
+    filtCPM <- TRUE
 }
 
-# Process other arguments
-if (weightOpt=="yes") {
-  wantWeight <- TRUE
+if (is.null(opt$cntReq) || is.null(opt$sampleReq)) {
+    filtSmpCount <- FALSE
+} else {
+    filtSmpCount <- TRUE
+}
+
+if (is.null(opt$totReq)) {
+    filtTotCount <- FALSE
 } else {
-  wantWeight <- FALSE
+    filtTotCount <- TRUE
+}
+
+if (is.null(opt$rdaOpt)) {
+    wantRda <- FALSE
+} else {
+    wantRda <- TRUE
+}
+
+if (is.null(opt$annoPath)) {
+    haveAnno <- FALSE
+} else {
+    haveAnno <- TRUE
 }
 
-if (rdaOpt=="yes") {
-  wantRda <- TRUE
+if (is.null(opt$normCounts)) {
+    wantNorm <- FALSE
 } else {
-  wantRda <- FALSE
+    wantNorm <- TRUE
+}
+
+if (is.null(opt$robOpt)) {
+    wantRobust <- FALSE
+} else {
+    wantRobust <- TRUE
 }
 
-if (annoPath=="None") {
-  haveAnno <- FALSE
+if (is.null(opt$weightOpt)) {
+    wantWeight <- FALSE
 } else {
-  haveAnno <- TRUE
+    wantWeight <- TRUE
 }
 
-if (normCounts=="yes") {
-  wantNorm <- TRUE
+if (is.null(opt$trend)) {
+    wantTrend <- FALSE
+    deMethod <- "limma-voom"
 } else {
-  wantNorm <- FALSE
+    wantTrend <- TRUE
+    deMethod <- "limma-trend"
+    priorCount <- opt$trend
 }
 
 
+if (!is.null(opt$filesPath)) {
+    # Process the separate count files (adapted from DESeq2 wrapper)
+    library("rjson")
+    parser <- newJSONParser()
+    parser$addData(opt$filesPath)
+    factorList <- parser$getObject()
+    factors <- sapply(factorList, function(x) x[[1]])
+    filenamesIn <- unname(unlist(factorList[[1]][[2]]))
+    sampleTable <- data.frame(sample=basename(filenamesIn),
+                            filename=filenamesIn,
+                            row.names=filenamesIn,
+                            stringsAsFactors=FALSE)
+    for (factor in factorList) {
+        factorName <- factor[[1]]
+        sampleTable[[factorName]] <- character(nrow(sampleTable))
+        lvls <- sapply(factor[[2]], function(x) names(x))
+        for (i in seq_along(factor[[2]])) {
+            files <- factor[[2]][[i]][[1]]
+            sampleTable[files,factorName] <- lvls[i]
+        }
+        sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls)
+    }
+    rownames(sampleTable) <- sampleTable$sample
+    rem <- c("sample","filename")
+    factors <- sampleTable[, !(names(sampleTable) %in% rem), drop=FALSE]
+
+    #read in count files and create single table
+    countfiles <- lapply(sampleTable$filename, function(x){read.delim(x, row.names=1)})
+    counts <- do.call("cbind", countfiles)
+
+} else {
+    # Process the single count matrix
+    counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
+    row.names(counts) <- counts[, 1]
+    counts <- counts[ , -1]
+    countsRows <- nrow(counts)
+
+    # Process factors
+    if (is.null(opt$factInput)) {
+            factorData <- read.table(opt$factFile, header=TRUE, sep="\t")
+            factors <- factorData[, -1, drop=FALSE]
+    }  else {
+            factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE))
+            factorData <- list()
+            for (fact in factors) {
+                newFact <- unlist(strsplit(fact, split="::"))
+                factorData <- rbind(factorData, newFact)
+            } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
+
+            # Set the row names to be the name of the factor and delete first row
+            row.names(factorData) <- factorData[, 1]
+            factorData <- factorData[, -1]
+            factorData <- sapply(factorData, sanitiseGroups)
+            factorData <- sapply(factorData, strsplit, split=",")
+            factorData <- sapply(factorData, make.names)
+            # Transform factor data into data frame of R factor objects
+            factors <- data.frame(factorData)
+    }
+}
+
+ # if annotation file provided
+if (haveAnno) {
+    geneanno <- read.table(opt$annoPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
+}
+
 #Create output directory
-dir.create(outPath, showWarnings=FALSE)
+dir.create(opt$outPath, showWarnings=FALSE)
 
 # Split up contrasts seperated by comma into a vector then sanitise
-contrastData <- unlist(strsplit(contrastData, split=","))
+contrastData <- unlist(strsplit(opt$contrastData, split=","))
 contrastData <- sanitiseEquation(contrastData)
 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE)
 
-bcvOutPdf <- makeOut("bcvplot.pdf")
-bcvOutPng <- makeOut("bcvplot.png")
-mdsOutPdf <- makeOut("mdsplot.pdf")
-mdsOutPng <- makeOut("mdsplot.png")
-voomOutPdf <- makeOut("voomplot.pdf")
-voomOutPng <- makeOut("voomplot.png") 
+
+mdsOutPdf <- makeOut("mdsplot_nonorm.pdf")
+mdsOutPng <- makeOut("mdsplot_nonorm.png")
+nmdsOutPdf <- makeOut("mdsplot.pdf")
+nmdsOutPng <- makeOut("mdsplot.png")
 maOutPdf <- character()   # Initialise character vector
 maOutPng <- character()
 topOut <- character()
 for (i in 1:length(contrastData)) {
-  maOutPdf[i] <- makeOut(paste0("maplot_", contrastData[i], ".pdf"))
-  maOutPng[i] <- makeOut(paste0("maplot_", contrastData[i], ".png"))
-  topOut[i] <- makeOut(paste0("limma-voom_", contrastData[i], ".tsv"))
-}                         # Save output paths for each contrast as vectors
-normOut <- makeOut("limma-voom_normcounts.tsv")
-rdaOut <- makeOut("RData.rda")
+    maOutPdf[i] <- makeOut(paste0("maplot_", contrastData[i], ".pdf"))
+    maOutPng[i] <- makeOut(paste0("maplot_", contrastData[i], ".png"))
+    topOut[i] <- makeOut(paste0(deMethod, "_", contrastData[i], ".tsv"))
+}
+normOut <- makeOut(paste0(deMethod, "_normcounts.tsv"))
+rdaOut <- makeOut(paste0(deMethod, "_analysis.RData"))
 sessionOut <- makeOut("session_info.txt")
 
-# Initialise data for html links and images, data frame with columns Label and 
+# Initialise data for html links and images, data frame with columns Label and
 # Link
 linkData <- data.frame(Label=character(), Link=character(),
                        stringsAsFactors=FALSE)
@@ -238,21 +339,13 @@
 upCount <- numeric()
 downCount <- numeric()
 flatCount <- numeric()
-                        
-# Read in counts and geneanno data
-counts <- read.table(countPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
-row.names(counts) <- counts[, 1]
-counts <- counts[ , -1]
-countsRows <- nrow(counts)
-if (haveAnno) {
-  geneanno <- read.table(annoPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
-}
 
 ################################################################################
 ### Data Processing
 ################################################################################
 
 # Extract counts and annotation data
+print("Extracting counts")
 data <- list()
 data$counts <- counts
 if (haveAnno) {
@@ -261,12 +354,24 @@
   data$genes <- data.frame(GeneID=row.names(counts))
 }
 
-# Filter out genes that do not have a required cpm in a required number of
-# samples
+# If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of
+# samples. Default is no filtering
 preFilterCount <- nrow(data$counts)
-sel <- rowSums(cpm(data$counts) > cpmReq) >= sampleReq
-data$counts <- data$counts[sel, ]
-data$genes <- data$genes[sel, ,drop = FALSE]
+
+if (filtCPM || filtSmpCount || filtTotCount) {
+
+    if (filtTotCount) {
+        keep <- rowSums(data$counts) >= opt$cntReq
+    } else if (filtSmpCount) {
+        keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
+    } else if (filtCPM) {
+        keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq
+    }
+
+    data$counts <- data$counts[keep, ]
+    data$genes <- data$genes[keep, , drop=FALSE]
+}
+
 postFilterCount <- nrow(data$counts)
 filteredCount <- preFilterCount-postFilterCount
 
@@ -276,173 +381,231 @@
 
 
 # Generating the DGEList object "data"
+print("Generating DGEList object")
 data$samples <- sampleanno
 data$samples$lib.size <- colSums(data$counts)
 data$samples$norm.factors <- 1
 row.names(data$samples) <- colnames(data$counts)
 data <- new("DGEList", data)
 
+print("Generating Design")
+# Name rows of factors according to their sample
+row.names(factors) <- names(data$counts)
 factorList <- sapply(names(factors), pasteListName)
-
-formula <- "~0" 
+formula <- "~0"
 for (i in 1:length(factorList)) {
     formula <- paste(formula,factorList[i], sep="+")
 }
-
 formula <- formula(formula)
 design <- model.matrix(formula)
-
 for (i in 1:length(factorList)) {
     colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE)
 }
 
-# Calculating normalising factor, estimating dispersion
-data <- calcNormFactors(data, method=normOpt)
-#data <- estimateDisp(data, design=design, robust=TRUE)
+# Calculating normalising factors
+print("Calculating Normalisation Factors")
+data <- calcNormFactors(data, method=opt$normOpt)
 
 # Generate contrasts information
+print("Generating Contrasts")
 contrasts <- makeContrasts(contrasts=contrastData, levels=design)
 
-# Name rows of factors according to their sample
-row.names(factors) <- names(data$counts)
-
 ################################################################################
 ### Data Output
 ################################################################################
-
-# BCV Plot
-#png(bcvOutPng, width=600, height=600)
-#plotBCV(data, main="BCV Plot")
-#imageData[1, ] <- c("BCV Plot", "bcvplot.png")
-#invisible(dev.off())
-
-#pdf(bcvOutPdf)
-#plotBCV(data, main="BCV Plot")
-#invisible(dev.off())
-
-if (wantWeight) {
-  # Creating voom data object and plot
-  png(voomOutPng, width=1000, height=600)
-  vData <- voomWithQualityWeights(data, design=design, plot=TRUE)
-  imageData[1, ] <- c("Voom Plot", "voomplot.png")
-  invisible(dev.off())
-  
-  pdf(voomOutPdf, width=14)
-  vData <- voomWithQualityWeights(data, design=design, plot=TRUE)
-  linkData[1, ] <- c("Voom Plot (.pdf)", "voomplot.pdf")
-  invisible(dev.off())
-  
-  # Generating fit data and top table with weights
-  wts <- vData$weights
-  voomFit <- lmFit(vData, design, weights=wts)
-  
-} else {
-  # Creating voom data object and plot
-  png(voomOutPng, width=600, height=600)
-  vData <- voom(data, design=design, plot=TRUE)
-  imageData[1, ] <- c("Voom Plot", "voomplot.png")
-  invisible(dev.off())
-  
-  pdf(voomOutPdf)
-  vData <- voom(data, design=design, plot=TRUE)
-  linkData[1, ] <- c("Voom Plot (.pdf)", "voomplot.pdf")
-  invisible(dev.off())
-  
-  # Generate voom fit
-  voomFit <- lmFit(vData, design)
-  
-}
-
- # Save normalised counts (log2cpm)
-if (wantNorm) {   
-    norm_counts <- data.frame(vData$genes, vData$E)
-    write.table (norm_counts, file=normOut, row.names=FALSE, sep="\t")
-    linkData <- rbind(linkData, c("limma-voom_normcounts.tsv", "limma-voom_normcounts.tsv"))
-}
-
-# Fit linear model and estimate dispersion with eBayes
-voomFit <- contrasts.fit(voomFit, contrasts)
-voomFit <- eBayes(voomFit)
-
 # Plot MDS
+print("Generating MDS plot")
 labels <- names(counts)
 png(mdsOutPng, width=600, height=600)
 # Currently only using a single factor
-plotMDS(vData, labels=labels, col=as.numeric(factors[, 1]), cex=0.8)
-imgName <- "Voom Plot"
+plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot (unnormalised)")
+imageData[1, ] <- c("MDS Plot (unnormalised)", "mdsplot_nonorm.png")
+invisible(dev.off())
+
+pdf(mdsOutPdf)
+plotMDS(data, labels=labels, cex=0.5)
+linkData[1, ] <- c("MDS Plot (unnormalised).pdf", "mdsplot_nonorm.pdf")
+invisible(dev.off())
+
+if (wantTrend) {
+    # limma-trend approach
+    logCPM <- cpm(data, log=TRUE, prior.count=opt$trend)
+    fit <- lmFit(logCPM, design)
+    fit <- contrasts.fit(fit, contrasts)
+    if (wantRobust) {
+        fit <- eBayes(fit, trend=TRUE, robust=TRUE)
+    } else {
+        fit <- eBayes(fit, trend=TRUE, robust=FALSE)
+    }
+    # plot fit with plotSA
+    saOutPng <- makeOut("saplot.png")
+    saOutPdf <- makeOut("saplot.pdf")
+
+    png(saOutPng, width=600, height=600)
+    plotSA(fit, main="SA Plot")
+    imgName <- "SA Plot.png"
+    imgAddr <- "saplot.png"
+    imageData <- rbind(imageData, c(imgName, imgAddr))
+    invisible(dev.off())
+
+    pdf(saOutPdf, width=14)
+    plotSA(fit, main="SA Plot")
+    linkName <- paste0("SA Plot.pdf")
+    linkAddr <- paste0("saplot.pdf")
+    linkData <- rbind(linkData, c(linkName, linkAddr))
+    invisible(dev.off())
+
+    plotData <- logCPM
+
+    # Save normalised counts (log2cpm)
+    if (wantNorm) {
+        write.table(logCPM, file=normOut, row.names=TRUE, sep="\t")
+        linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts.tsv"))))
+    }
+} else {
+    # limma-voom approach
+    voomOutPdf <- makeOut("voomplot.pdf")
+    voomOutPng <- makeOut("voomplot.png")
+
+    if (wantWeight) {
+        # Creating voom data object and plot
+        png(voomOutPng, width=1000, height=600)
+        vData <- voomWithQualityWeights(data, design=design, plot=TRUE)
+        imgName <- "Voom Plot.png"
+        imgAddr <- "voomplot.png"
+        imageData <- rbind(imageData, c(imgName, imgAddr))
+        invisible(dev.off())
+
+        pdf(voomOutPdf, width=14)
+        vData <- voomWithQualityWeights(data, design=design, plot=TRUE)
+        linkName <- paste0("Voom Plot.pdf")
+        linkAddr <- paste0("voomplot.pdf")
+        linkData <- rbind(linkData, c(linkName, linkAddr))
+        invisible(dev.off())
+
+        # Generating fit data and top table with weights
+        wts <- vData$weights
+        voomFit <- lmFit(vData, design, weights=wts)
+
+    } else {
+        # Creating voom data object and plot
+        png(voomOutPng, width=600, height=600)
+        vData <- voom(data, design=design, plot=TRUE)
+        imgName <- "Voom Plot"
+        imgAddr <- "voomplot.png"
+        imageData <- rbind(imageData, c(imgName, imgAddr))
+        invisible(dev.off())
+
+        pdf(voomOutPdf)
+        vData <- voom(data, design=design, plot=TRUE)
+        linkName <- paste0("Voom Plot.pdf")
+        linkAddr <- paste0("voomplot.pdf")
+        linkData <- rbind(linkData, c(linkName, linkAddr))
+        invisible(dev.off())
+
+        # Generate voom fit
+        voomFit <- lmFit(vData, design)
+    }
+
+     # Save normalised counts (log2cpm)
+    if (wantNorm) {
+        norm_counts <- data.frame(vData$genes, vData$E)
+        write.table(norm_counts, file=normOut, row.names=FALSE, sep="\t")
+        linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts.tsv"))))
+    }
+
+    # Fit linear model and estimate dispersion with eBayes
+    voomFit <- contrasts.fit(voomFit, contrasts)
+    if (wantRobust) {
+        fit <- eBayes(voomFit, robust=TRUE)
+    } else {
+        fit <- eBayes(voomFit, robust=FALSE)
+    }
+    plotData <- vData
+}
+
+print("Generating normalised MDS plot")
+png(nmdsOutPng, width=600, height=600)
+# Currently only using a single factor
+plotMDS(plotData, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot (normalised)")
+imgName <- "MDS Plot (normalised)"
 imgAddr <- "mdsplot.png"
 imageData <- rbind(imageData, c(imgName, imgAddr))
 invisible(dev.off())
 
-pdf(mdsOutPdf)
-plotMDS(vData, labels=labels, cex=0.5)
-linkName <- paste0("MDS Plot (.pdf)")
+pdf(nmdsOutPdf)
+plotMDS(plotData, labels=labels, cex=0.5)
+linkName <- paste0("MDS Plot (normalised).pdf")
 linkAddr <- paste0("mdsplot.pdf")
 linkData <- rbind(linkData, c(linkName, linkAddr))
 invisible(dev.off())
 
 
+print("Generating DE results")
+status = decideTests(fit, adjust.method=opt$pAdjOpt, p.value=opt$pValReq,
+                       lfc=opt$lfcReq)
+sumStatus <- summary(status)
+
 for (i in 1:length(contrastData)) {
+    # Collect counts for differential expression
+    upCount[i] <- sumStatus["Up", i]
+    downCount[i] <- sumStatus["Down", i]
+    flatCount[i] <- sumStatus["NotSig", i]
+
+    # Write top expressions table
+    top <- topTable(fit, coef=i, number=Inf, sort.by="P")
+    if (wantTrend) {
+        write.table(top, file=topOut[i], row.names=TRUE, sep="\t")
+    } else {
+        write.table(top, file=topOut[i], row.names=FALSE, sep="\t")
+    }
+
+    linkName <- paste0(deMethod, "_", contrastData[i], ".tsv")
+    linkAddr <- paste0(deMethod, "_", contrastData[i], ".tsv")
+    linkData <- rbind(linkData, c(linkName, linkAddr))
 
-  status = decideTests(voomFit[, i], adjust.method=pAdjOpt, p.value=pValReq,
-                       lfc=lfcReq)
-                       
-  sumStatus <- summary(status)
-  
-  # Collect counts for differential expression
-  upCount[i] <- sumStatus["1",]
-  downCount[i] <- sumStatus["-1",]
-  flatCount[i] <- sumStatus["0",]
-                       
-  # Write top expressions table
-  top <- topTable(voomFit, coef=i, number=Inf, sort.by="P")
-  write.table(top, file=topOut[i], row.names=FALSE, sep="\t")
-  
-  linkName <- paste0("limma-voom_", contrastData[i], ".tsv")
-  linkAddr <- paste0("limma-voom_", contrastData[i], ".tsv")
-  linkData <- rbind(linkData, c(linkName, linkAddr))
-  
-  # Plot MA (log ratios vs mean average) using limma package on weighted 
-  pdf(maOutPdf[i])
-  limma::plotMA(voomFit, status=status, coef=i,
-                main=paste("MA Plot:", unmake.names(contrastData[i])), 
-                col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
-                xlab="Average Expression", ylab="logFC")
-  
-  abline(h=0, col="grey", lty=2)
-  
-  linkName <- paste0("MA Plot_", contrastData[i], " (.pdf)")
-  linkAddr <- paste0("maplot_", contrastData[i], ".pdf")
-  linkData <- rbind(linkData, c(linkName, linkAddr))
-  invisible(dev.off())
-  
-  png(maOutPng[i], height=600, width=600)
-  limma::plotMA(voomFit, status=status, coef=i,
-                main=paste("MA Plot:", unmake.names(contrastData[i])), 
-                col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
-                xlab="Average Expression", ylab="logFC")
-  
-  abline(h=0, col="grey", lty=2)
-  
-  imgName <- paste0("MA Plot_", contrastData[i])
-  imgAddr <- paste0("maplot_", contrastData[i], ".png")
-  imageData <- rbind(imageData, c(imgName, imgAddr))
-  invisible(dev.off())
+    # Plot MA (log ratios vs mean average) using limma package on weighted
+    pdf(maOutPdf[i])
+    limma::plotMD(fit, status=status, coef=i,
+                  main=paste("MA Plot:", unmake.names(contrastData[i])),
+                  col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
+                  xlab="Average Expression", ylab="logFC")
+
+    abline(h=0, col="grey", lty=2)
+
+    linkName <- paste0("MA Plot_", contrastData[i], " (.pdf)")
+    linkAddr <- paste0("maplot_", contrastData[i], ".pdf")
+    linkData <- rbind(linkData, c(linkName, linkAddr))
+    invisible(dev.off())
+
+    png(maOutPng[i], height=600, width=600)
+    limma::plotMD(fit, status=status, coef=i,
+                  main=paste("MA Plot:", unmake.names(contrastData[i])),
+                  col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
+                  xlab="Average Expression", ylab="logFC")
+
+    abline(h=0, col="grey", lty=2)
+
+    imgName <- paste0("MA Plot_", contrastData[i])
+    imgAddr <- paste0("maplot_", contrastData[i], ".png")
+    imageData <- rbind(imageData, c(imgName, imgAddr))
+    invisible(dev.off())
 }
 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount)
 row.names(sigDiff) <- contrastData
 
 # Save relevant items as rda object
 if (wantRda) {
-  if (wantWeight) {
-    save(data, status, vData, labels, factors, wts, voomFit, top, contrasts, 
-         design,
-         file=rdaOut, ascii=TRUE)
-  } else {
-    save(data, status, vData, labels, factors, voomFit, top, contrasts, design,
-         file=rdaOut, ascii=TRUE)
-  }
-  linkData <- rbind(linkData, c("RData (.rda)", "RData.rda"))
+    print("Saving RData")
+    if (wantWeight) {
+      save(data, status, plotData, labels, factors, wts, fit, top, contrasts,
+           design,
+           file=rdaOut, ascii=TRUE)
+    } else {
+      save(data, status, plotData, labels, factors, fit, top, contrasts, design,
+           file=rdaOut, ascii=TRUE)
+    }
+    linkData <- rbind(linkData, c((paste0(deMethod, "_analysis.RData")), (paste0(deMethod, "_analysis.RData"))))
 }
 
 # Record session info
@@ -458,21 +621,21 @@
 ################################################################################
 
 # Clear file
-cat("", file=htmlPath)
+cat("", file=opt$htmlPath)
 
 cata("<html>\n")
 
 cata("<body>\n")
-cata("<h3>Limma-voom Analysis Output:</h3>\n")
+cata("<h3>Limma Analysis Output:</h3>\n")
 cata("PDF copies of JPEGS available in 'Plots' section.<br />\n")
 if (wantWeight) {
-  HtmlImage(imageData$Link[1], imageData$Label[1], width=1000)
+    HtmlImage(imageData$Link[1], imageData$Label[1], width=1000)
 } else {
-  HtmlImage(imageData$Link[1], imageData$Label[1])
+    HtmlImage(imageData$Link[1], imageData$Label[1])
 }
 
 for (i in 2:nrow(imageData)) {
-  HtmlImage(imageData$Link[i], imageData$Label[i])
+    HtmlImage(imageData$Link[i], imageData$Label[i])
 }
 
 cata("<h4>Differential Expression Counts:</h4>\n")
@@ -481,40 +644,40 @@
 cata("<tr>\n")
 TableItem()
 for (i in colnames(sigDiff)) {
-  TableHeadItem(i)
+    TableHeadItem(i)
 }
 cata("</tr>\n")
 for (i in 1:nrow(sigDiff)) {
-  cata("<tr>\n")
-  TableHeadItem(unmake.names(row.names(sigDiff)[i]))
-  for (j in 1:ncol(sigDiff)) {
-    TableItem(as.character(sigDiff[i, j]))
-  }
-  cata("</tr>\n")
+    cata("<tr>\n")
+    TableHeadItem(unmake.names(row.names(sigDiff)[i]))
+    for (j in 1:ncol(sigDiff)) {
+        TableItem(as.character(sigDiff[i, j]))
+    }
+    cata("</tr>\n")
 }
 cata("</table>")
 
 cata("<h4>Plots:</h4>\n")
 for (i in 1:nrow(linkData)) {
-  if (grepl(".pdf", linkData$Link[i])) {
-    HtmlLink(linkData$Link[i], linkData$Label[i])
+    if (grepl(".pdf", linkData$Link[i])) {
+        HtmlLink(linkData$Link[i], linkData$Label[i])
   }
 }
 
 cata("<h4>Tables:</h4>\n")
 for (i in 1:nrow(linkData)) {
-  if (grepl(".tsv", linkData$Link[i])) {
-    HtmlLink(linkData$Link[i], linkData$Label[i])
-  }
+    if (grepl(".tsv", linkData$Link[i])) {
+        HtmlLink(linkData$Link[i], linkData$Label[i])
+    }
 }
 
 if (wantRda) {
-  cata("<h4>R Data Object:</h4>\n")
-  for (i in 1:nrow(linkData)) {
-    if (grepl(".rda", linkData$Link[i])) {
-      HtmlLink(linkData$Link[i], linkData$Label[i])
+    cata("<h4>R Data Object:</h4>\n")
+    for (i in 1:nrow(linkData)) {
+        if (grepl(".RData", linkData$Link[i])) {
+            HtmlLink(linkData$Link[i], linkData$Label[i])
+        }
     }
-  }
 }
 
 cata("<p>Alt-click links to download file.</p>\n")
@@ -524,40 +687,60 @@
 
 cata("<h4>Additional Information</h4>\n")
 cata("<ul>\n")
-if (cpmReq!=0 && sampleReq!=0) {
-  tempStr <- paste("Genes without more than", cpmReq,
-                   "CPM in at least", sampleReq, "samples are insignificant",
-                   "and filtered out.")
-  ListItem(tempStr)
-  filterProp <- round(filteredCount/preFilterCount*100, digits=2)
-  tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp,
-                   "%) genes were filtered out for low expression.")
-  ListItem(tempStr)
-}
-ListItem(normOpt, " was the method used to normalise library sizes.")
-if (wantWeight) {
-  ListItem("Weights were applied to samples.")
-} else {
-  ListItem("Weights were not applied to samples.")
+
+if (filtCPM || filtSmpCount || filtTotCount) {
+    if (filtCPM) {
+    tempStr <- paste("Genes without more than", opt$cmpReq,
+                                     "CPM in at least", opt$sampleReq, "samples are insignificant",
+                                     "and filtered out.")
+    } else if (filtSmpCount) {
+        tempStr <- paste("Genes without more than", opt$cntReq,
+                                     "counts in at least", opt$sampleReq, "samples are insignificant",
+                                     "and filtered out.")
+    } else if (filtTotCount) {
+            tempStr <- paste("Genes without more than", opt$cntReq,
+                                     "counts, after summing counts for all samples, are insignificant",
+                                     "and filtered out.")
+    }
+
+    ListItem(tempStr)
+    filterProp <- round(filteredCount/preFilterCount*100, digits=2)
+    tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp,
+                                     "%) genes were filtered out for low expression.")
+    ListItem(tempStr)
 }
-if (pAdjOpt!="none") {
-  if (pAdjOpt=="BH" || pAdjOpt=="BY") {
-    tempStr <- paste0("MA-Plot highlighted genes are significant at FDR ",
-                      "of ", pValReq," and exhibit log2-fold-change of at ", 
-                      "least ", lfcReq, ".")
-    ListItem(tempStr)
-  } else if (pAdjOpt=="holm") {
-    tempStr <- paste0("MA-Plot highlighted genes are significant at adjusted ",
-                      "p-value of ", pValReq,"  by the Holm(1979) ",
-                      "method, and exhibit log2-fold-change of at least ", 
-                      lfcReq, ".")
-    ListItem(tempStr)
-  }
+ListItem(opt$normOpt, " was the method used to normalise library sizes.")
+if (wantTrend) {
+    ListItem("The limma-trend method was used.")
+} else {
+    ListItem("The limma-voom method was used.")
+}
+if (wantWeight) {
+    ListItem("Weights were applied to samples.")
 } else {
-  tempStr <- paste0("MA-Plot highlighted genes are significant at p-value ",
-                    "of ", pValReq," and exhibit log2-fold-change of at ", 
-                    "least ", lfcReq, ".")
-  ListItem(tempStr)
+    ListItem("Weights were not applied to samples.")
+}
+if (wantRobust) {
+    ListItem("eBayes was used with robust settings (robust=TRUE).")
+}
+if (opt$pAdjOpt!="none") {
+    if (opt$pAdjOpt=="BH" || opt$pAdjOpt=="BY") {
+        tempStr <- paste0("MA-Plot highlighted genes are significant at FDR ",
+                        "of ", opt$pValReq," and exhibit log2-fold-change of at ",
+                        "least ", opt$lfcReq, ".")
+        ListItem(tempStr)
+    } else if (opt$pAdjOpt=="holm") {
+        tempStr <- paste0("MA-Plot highlighted genes are significant at adjusted ",
+                        "p-value of ", opt$pValReq,"  by the Holm(1979) ",
+                        "method, and exhibit log2-fold-change of at least ",
+                        opt$lfcReq, ".")
+        ListItem(tempStr)
+    }
+  } else {
+        tempStr <- paste0("MA-Plot highlighted genes are significant at p-value ",
+                      "of ", opt$pValReq," and exhibit log2-fold-change of at ",
+                      "least ", opt$lfcReq, ".")
+        ListItem(tempStr)
 }
 cata("</ul>\n")
 
@@ -570,19 +753,18 @@
 TableHeadItem("SampleID")
 TableHeadItem(names(factors)[1]," (Primary Factor)")
 
-  if (ncol(factors) > 1) {
-
+if (ncol(factors) > 1) {
     for (i in names(factors)[2:length(names(factors))]) {
-      TableHeadItem(i)
+        TableHeadItem(i)
     }
     cata("</tr>\n")
-  }
+}
 
 for (i in 1:nrow(factors)) {
-  cata("<tr>\n")
-  TableHeadItem(row.names(factors)[i])
-  for (j in 1:ncol(factors)) {
-    TableItem(as.character(unmake.names(factors[i, j])))
+    cata("<tr>\n")
+    TableHeadItem(row.names(factors)[i])
+    for (j in 1:ncol(factors)) {
+        TableItem(as.character(unmake.names(factors[i, j])))
   }
   cata("</tr>\n")
 }
@@ -638,7 +820,7 @@
 cit[10] <- paste("Law CW, Chen Y, Shi W, and Smyth GK (2014). Voom:",
                 "precision weights unlock linear model analysis tools for",
                 "RNA-seq read counts. Genome Biology 15, R29.")
-cit[11] <- paste("Ritchie ME, Diyagama D, Neilson J, van Laar R,", 
+cit[11] <- paste("Ritchie ME, Diyagama D, Neilson J, van Laar R,",
                 "Dobrovic A, Holloway A and Smyth GK (2006).",
                 "Empirical array quality weights for microarray data.",
                 "BMC Bioinformatics 7, Article 261.")
@@ -667,9 +849,9 @@
 cata("<p>Please report problems or suggestions to: su.s@wehi.edu.au</p>\n")
 
 for (i in 1:nrow(linkData)) {
-  if (grepl("session_info", linkData$Link[i])) {
-    HtmlLink(linkData$Link[i], linkData$Label[i])
-  }
+    if (grepl("session_info", linkData$Link[i])) {
+        HtmlLink(linkData$Link[i], linkData$Label[i])
+    }
 }
 
 cata("<table border=\"0\">\n")
author	iuc
date	Wed, 31 Jan 2018 12:45:42 -0500
parents	a330ddf43861
children	a61a6e62e91f