Mercurial > repos > iuc > limma_voom

diff test-data/out_rscript.txt @ 4:a61a6e62e91f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 6a458881c0819b75e55e64b3f494679d43bb9ee8
author: iuc
date: Sun, 29 Apr 2018 17:36:42 -0400
children: d8a55b5f0de0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_rscript.txt	Sun Apr 29 17:36:42 2018 -0400
@@ -0,0 +1,869 @@
+# This tool takes in a matrix of feature counts as well as gene annotations and
+# outputs a table of top expressions as well as various plots for differential
+# expression analysis
+#
+# ARGS: htmlPath", "R", 1, "character"      -Path to html file linking to other outputs
+#       outPath", "o", 1, "character"       -Path to folder to write all output to
+#       filesPath", "j", 2, "character"     -JSON list object if multiple files input
+#       matrixPath", "m", 2, "character"    -Path to count matrix
+#       factFile", "f", 2, "character"      -Path to factor information file
+#       factInput", "i", 2, "character"     -String containing factors if manually input
+#       annoPath", "a", 2, "character"      -Path to input containing gene annotations
+#       contrastData", "C", 1, "character"  -String containing contrasts of interest
+#       cpmReq", "c", 2, "double"           -Float specifying cpm requirement
+#       cntReq", "z", 2, "integer"          -Integer specifying minimum total count requirement
+#       sampleReq", "s", 2, "integer"       -Integer specifying cpm requirement
+#       normCounts", "x", 0, "logical"      -String specifying if normalised counts should be output
+#       rdaOpt", "r", 0, "logical"          -String specifying if RData should be output
+#       lfcReq", "l", 1, "double"           -Float specifying the log-fold-change requirement
+#       pValReq", "p", 1, "double"          -Float specifying the p-value requirement
+#       pAdjOpt", "d", 1, "character"       -String specifying the p-value adjustment method
+#       normOpt", "n", 1, "character"       -String specifying type of normalisation used
+#       robOpt", "b", 0, "logical"          -String specifying if robust options should be used
+#       trend", "t", 1, "double"            -Float for prior.count if limma-trend is used instead of voom
+#       weightOpt", "w", 0, "logical"       -String specifying if voomWithQualityWeights should be used
+#
+# OUT:
+#       MDS Plot
+#       Voom/SA plot
+#       MD Plot
+#       Expression Table
+#       HTML file linking to the ouputs
+# Optional:
+#       Normalised counts Table
+#       RData file
+#
+#
+# Author: Shian Su - registertonysu@gmail.com - Jan 2014
+# Modified by: Maria Doyle - Jun 2017, Jan 2018
+
+# Record starting time
+timeStart <- as.character(Sys.time())
+
+# Load all required libraries
+library(methods, quietly=TRUE, warn.conflicts=FALSE)
+library(statmod, quietly=TRUE, warn.conflicts=FALSE)
+library(splines, quietly=TRUE, warn.conflicts=FALSE)
+library(edgeR, quietly=TRUE, warn.conflicts=FALSE)
+library(limma, quietly=TRUE, warn.conflicts=FALSE)
+library(scales, quietly=TRUE, warn.conflicts=FALSE)
+library(getopt, quietly=TRUE, warn.conflicts=FALSE)
+
+if (packageVersion("limma") < "3.20.1") {
+    stop("Please update 'limma' to version >= 3.20.1 to run this tool")
+}
+
+################################################################################
+### Function Delcaration
+################################################################################
+# Function to sanitise contrast equations so there are no whitespaces
+# surrounding the arithmetic operators, leading or trailing whitespace
+sanitiseEquation <- function(equation) {
+    equation <- gsub(" *[+] *", "+", equation)
+    equation <- gsub(" *[-] *", "-", equation)
+    equation <- gsub(" *[/] *", "/", equation)
+    equation <- gsub(" *[*] *", "*", equation)
+    equation <- gsub("^\\s+|\\s+$", "", equation)
+    return(equation)
+}
+
+# Function to sanitise group information
+sanitiseGroups <- function(string) {
+    string <- gsub(" *[,] *", ",", string)
+    string <- gsub("^\\s+|\\s+$", "", string)
+    return(string)
+}
+
+# Function to change periods to whitespace in a string
+unmake.names <- function(string) {
+    string <- gsub(".", " ", string, fixed=TRUE)
+    return(string)
+}
+
+# Generate output folder and paths
+makeOut <- function(filename) {
+    return(paste0(opt$outPath, "/", filename))
+}
+
+# Generating design information
+pasteListName <- function(string) {
+    return(paste0("factors$", string))
+}
+
+# Create cata function: default path set, default seperator empty and appending
+# true by default (Ripped straight from the cat function with altered argument
+# defaults)
+cata <- function(..., file = opt$htmlPath, sep = "", fill = FALSE, labels = NULL,
+               append = TRUE) {
+    if (is.character(file))
+        if (file == "")
+            file <- stdout()
+        else if (substring(file, 1L, 1L) == "|") {
+            file <- pipe(substring(file, 2L), "w")
+            on.exit(close(file))
+        }
+        else {
+        file <- file(file, ifelse(append, "a", "w"))
+      on.exit(close(file))
+        }
+    .Internal(cat(list(...), file, sep, fill, labels, append))
+}
+
+# Function to write code for html head and title
+HtmlHead <- function(title) {
+    cata("<head>\n")
+    cata("<title>", title, "</title>\n")
+    cata("</head>\n")
+}
+
+# Function to write code for html links
+HtmlLink <- function(address, label=address) {
+    cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n")
+}
+
+# Function to write code for html images
+HtmlImage <- function(source, label=source, height=600, width=600) {
+    cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height)
+    cata("\" width=\"", width, "\"/>\n")
+}
+
+# Function to write code for html list items
+ListItem <- function(...) {
+    cata("<li>", ..., "</li>\n")
+}
+
+TableItem <- function(...) {
+    cata("<td>", ..., "</td>\n")
+}
+
+TableHeadItem <- function(...) {
+    cata("<th>", ..., "</th>\n")
+}
+
+################################################################################
+### Input Processing
+################################################################################
+
+# Collect arguments from command line
+args <- commandArgs(trailingOnly=TRUE)
+
+# Get options, using the spec as defined by the enclosed list.
+# Read the options from the default: commandArgs(TRUE).
+spec <- matrix(c(
+    "htmlPath", "R", 1, "character",
+    "outPath", "o", 1, "character",
+    "filesPath", "j", 2, "character",
+    "matrixPath", "m", 2, "character",
+    "factFile", "f", 2, "character",
+    "factInput", "i", 2, "character",
+    "annoPath", "a", 2, "character",
+    "contrastData", "C", 1, "character",
+    "cpmReq", "c", 1, "double",
+    "totReq", "y", 0, "logical",
+    "cntReq", "z", 1, "integer",
+    "sampleReq", "s", 1, "integer",
+    "normCounts", "x", 0, "logical",
+    "rdaOpt", "r", 0, "logical",
+    "lfcReq", "l", 1, "double",
+    "pValReq", "p", 1, "double",
+    "pAdjOpt", "d", 1, "character",
+    "normOpt", "n", 1, "character",
+    "robOpt", "b", 0, "logical",
+    "trend", "t", 1, "double",
+    "weightOpt", "w", 0, "logical"),
+    byrow=TRUE, ncol=4)
+opt <- getopt(spec)
+
+
+if (is.null(opt$matrixPath) & is.null(opt$filesPath)) {
+    cat("A counts matrix (or a set of counts files) is required.\n")
+    q(status=1)
+}
+
+if (is.null(opt$cpmReq)) {
+    filtCPM <- FALSE
+} else {
+    filtCPM <- TRUE
+}
+
+if (is.null(opt$cntReq) || is.null(opt$sampleReq)) {
+    filtSmpCount <- FALSE
+} else {
+    filtSmpCount <- TRUE
+}
+
+if (is.null(opt$totReq)) {
+    filtTotCount <- FALSE
+} else {
+    filtTotCount <- TRUE
+}
+
+if (is.null(opt$rdaOpt)) {
+    wantRda <- FALSE
+} else {
+    wantRda <- TRUE
+}
+
+if (is.null(opt$annoPath)) {
+    haveAnno <- FALSE
+} else {
+    haveAnno <- TRUE
+}
+
+if (is.null(opt$normCounts)) {
+    wantNorm <- FALSE
+} else {
+    wantNorm <- TRUE
+}
+
+if (is.null(opt$robOpt)) {
+    wantRobust <- FALSE
+} else {
+    wantRobust <- TRUE
+}
+
+if (is.null(opt$weightOpt)) {
+    wantWeight <- FALSE
+} else {
+    wantWeight <- TRUE
+}
+
+if (is.null(opt$trend)) {
+    wantTrend <- FALSE
+    deMethod <- "limma-voom"
+} else {
+    wantTrend <- TRUE
+    deMethod <- "limma-trend"
+    priorCount <- opt$trend
+}
+
+
+if (!is.null(opt$filesPath)) {
+    # Process the separate count files (adapted from DESeq2 wrapper)
+    library("rjson")
+    parser <- newJSONParser()
+    parser$addData(opt$filesPath)
+    factorList <- parser$getObject()
+    factors <- sapply(factorList, function(x) x[[1]])
+    filenamesIn <- unname(unlist(factorList[[1]][[2]]))
+    sampleTable <- data.frame(sample=basename(filenamesIn),
+                            filename=filenamesIn,
+                            row.names=filenamesIn,
+                            stringsAsFactors=FALSE)
+    for (factor in factorList) {
+        factorName <- factor[[1]]
+        sampleTable[[factorName]] <- character(nrow(sampleTable))
+        lvls <- sapply(factor[[2]], function(x) names(x))
+        for (i in seq_along(factor[[2]])) {
+            files <- factor[[2]][[i]][[1]]
+            sampleTable[files,factorName] <- lvls[i]
+        }
+        sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls)
+    }
+    rownames(sampleTable) <- sampleTable$sample
+    rem <- c("sample","filename")
+    factors <- sampleTable[, !(names(sampleTable) %in% rem), drop=FALSE]
+
+    #read in count files and create single table
+    countfiles <- lapply(sampleTable$filename, function(x){read.delim(x, row.names=1)})
+    counts <- do.call("cbind", countfiles)
+
+} else {
+    # Process the single count matrix
+    counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
+    row.names(counts) <- counts[, 1]
+    counts <- counts[ , -1]
+    countsRows <- nrow(counts)
+
+    # Process factors
+    if (is.null(opt$factInput)) {
+            factorData <- read.table(opt$factFile, header=TRUE, sep="\t")
+            factors <- factorData[, -1, drop=FALSE]
+    }  else {
+            factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE))
+            factorData <- list()
+            for (fact in factors) {
+                newFact <- unlist(strsplit(fact, split="::"))
+                factorData <- rbind(factorData, newFact)
+            } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
+
+            # Set the row names to be the name of the factor and delete first row
+            row.names(factorData) <- factorData[, 1]
+            factorData <- factorData[, -1]
+            factorData <- sapply(factorData, sanitiseGroups)
+            factorData <- sapply(factorData, strsplit, split=",")
+            factorData <- sapply(factorData, make.names)
+            # Transform factor data into data frame of R factor objects
+            factors <- data.frame(factorData)
+    }
+}
+
+ # if annotation file provided
+if (haveAnno) {
+    geneanno <- read.table(opt$annoPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
+}
+
+#Create output directory
+dir.create(opt$outPath, showWarnings=FALSE)
+
+# Split up contrasts seperated by comma into a vector then sanitise
+contrastData <- unlist(strsplit(opt$contrastData, split=","))
+contrastData <- sanitiseEquation(contrastData)
+contrastData <- gsub(" ", ".", contrastData, fixed=TRUE)
+
+
+mdsOutPdf <- makeOut("mdsplot_nonorm.pdf")
+mdsOutPng <- makeOut("mdsplot_nonorm.png")
+nmdsOutPdf <- makeOut("mdsplot.pdf")
+nmdsOutPng <- makeOut("mdsplot.png")
+maOutPdf <- character()   # Initialise character vector
+maOutPng <- character()
+topOut <- character()
+for (i in 1:length(contrastData)) {
+    maOutPdf[i] <- makeOut(paste0("maplot_", contrastData[i], ".pdf"))
+    maOutPng[i] <- makeOut(paste0("maplot_", contrastData[i], ".png"))
+    topOut[i] <- makeOut(paste0(deMethod, "_", contrastData[i], ".tsv"))
+}
+normOut <- makeOut(paste0(deMethod, "_normcounts.tsv"))
+rdaOut <- makeOut(paste0(deMethod, "_analysis.RData"))
+sessionOut <- makeOut("session_info.txt")
+
+# Initialise data for html links and images, data frame with columns Label and
+# Link
+linkData <- data.frame(Label=character(), Link=character(),
+                       stringsAsFactors=FALSE)
+imageData <- data.frame(Label=character(), Link=character(),
+                        stringsAsFactors=FALSE)
+
+# Initialise vectors for storage of up/down/neutral regulated counts
+upCount <- numeric()
+downCount <- numeric()
+flatCount <- numeric()
+
+################################################################################
+### Data Processing
+################################################################################
+
+# Extract counts and annotation data
+print("Extracting counts")
+data <- list()
+data$counts <- counts
+if (haveAnno) {
+  # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno)
+  annoord <- geneanno[match(row.names(counts), geneanno[,1]), ]
+  data$genes <- annoord
+} else {
+  data$genes <- data.frame(GeneID=row.names(counts))
+}
+
+# If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of
+# samples. Default is no filtering
+preFilterCount <- nrow(data$counts)
+
+if (filtCPM || filtSmpCount || filtTotCount) {
+
+    if (filtTotCount) {
+        keep <- rowSums(data$counts) >= opt$cntReq
+    } else if (filtSmpCount) {
+        keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
+    } else if (filtCPM) {
+        keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq
+    }
+
+    data$counts <- data$counts[keep, ]
+    data$genes <- data$genes[keep, , drop=FALSE]
+}
+
+postFilterCount <- nrow(data$counts)
+filteredCount <- preFilterCount-postFilterCount
+
+# Creating naming data
+samplenames <- colnames(data$counts)
+sampleanno <- data.frame("sampleID"=samplenames, factors)
+
+
+# Generating the DGEList object "data"
+print("Generating DGEList object")
+data$samples <- sampleanno
+data$samples$lib.size <- colSums(data$counts)
+data$samples$norm.factors <- 1
+row.names(data$samples) <- colnames(data$counts)
+data <- new("DGEList", data)
+
+print("Generating Design")
+# Name rows of factors according to their sample
+row.names(factors) <- names(data$counts)
+factorList <- sapply(names(factors), pasteListName)
+formula <- "~0"
+for (i in 1:length(factorList)) {
+    formula <- paste(formula,factorList[i], sep="+")
+}
+formula <- formula(formula)
+design <- model.matrix(formula)
+for (i in 1:length(factorList)) {
+    colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE)
+}
+
+# Calculating normalising factors
+print("Calculating Normalisation Factors")
+data <- calcNormFactors(data, method=opt$normOpt)
+
+# Generate contrasts information
+print("Generating Contrasts")
+contrasts <- makeContrasts(contrasts=contrastData, levels=design)
+
+################################################################################
+### Data Output
+################################################################################
+# Plot MDS
+print("Generating MDS plot")
+labels <- names(counts)
+png(mdsOutPng, width=600, height=600)
+# Currently only using a single factor
+plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot (unnormalised)")
+imageData[1, ] <- c("MDS Plot (unnormalised)", "mdsplot_nonorm.png")
+invisible(dev.off())
+
+pdf(mdsOutPdf)
+plotMDS(data, labels=labels, cex=0.5)
+linkData[1, ] <- c("MDS Plot (unnormalised).pdf", "mdsplot_nonorm.pdf")
+invisible(dev.off())
+
+if (wantTrend) {
+    # limma-trend approach
+    logCPM <- cpm(data, log=TRUE, prior.count=opt$trend)
+    fit <- lmFit(logCPM, design)
+    fit$genes <- data$genes
+    fit <- contrasts.fit(fit, contrasts)
+    if (wantRobust) {
+        fit <- eBayes(fit, trend=TRUE, robust=TRUE)
+    } else {
+        fit <- eBayes(fit, trend=TRUE, robust=FALSE)
+    }
+    # plot fit with plotSA
+    saOutPng <- makeOut("saplot.png")
+    saOutPdf <- makeOut("saplot.pdf")
+
+    png(saOutPng, width=600, height=600)
+    plotSA(fit, main="SA Plot")
+    imgName <- "SA Plot.png"
+    imgAddr <- "saplot.png"
+    imageData <- rbind(imageData, c(imgName, imgAddr))
+    invisible(dev.off())
+
+    pdf(saOutPdf, width=14)
+    plotSA(fit, main="SA Plot")
+    linkName <- paste0("SA Plot.pdf")
+    linkAddr <- paste0("saplot.pdf")
+    linkData <- rbind(linkData, c(linkName, linkAddr))
+    invisible(dev.off())
+
+    plotData <- logCPM
+
+    # Save normalised counts (log2cpm)
+    if (wantNorm) {
+        write.table(logCPM, file=normOut, row.names=TRUE, sep="\t", quote=FALSE)
+        linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts.tsv"))))
+    }
+} else {
+    # limma-voom approach
+    voomOutPdf <- makeOut("voomplot.pdf")
+    voomOutPng <- makeOut("voomplot.png")
+
+    if (wantWeight) {
+        # Creating voom data object and plot
+        png(voomOutPng, width=1000, height=600)
+        vData <- voomWithQualityWeights(data, design=design, plot=TRUE)
+        imgName <- "Voom Plot.png"
+        imgAddr <- "voomplot.png"
+        imageData <- rbind(imageData, c(imgName, imgAddr))
+        invisible(dev.off())
+
+        pdf(voomOutPdf, width=14)
+        vData <- voomWithQualityWeights(data, design=design, plot=TRUE)
+        linkName <- paste0("Voom Plot.pdf")
+        linkAddr <- paste0("voomplot.pdf")
+        linkData <- rbind(linkData, c(linkName, linkAddr))
+        invisible(dev.off())
+
+        # Generating fit data and top table with weights
+        wts <- vData$weights
+        voomFit <- lmFit(vData, design, weights=wts)
+
+    } else {
+        # Creating voom data object and plot
+        png(voomOutPng, width=600, height=600)
+        vData <- voom(data, design=design, plot=TRUE)
+        imgName <- "Voom Plot"
+        imgAddr <- "voomplot.png"
+        imageData <- rbind(imageData, c(imgName, imgAddr))
+        invisible(dev.off())
+
+        pdf(voomOutPdf)
+        vData <- voom(data, design=design, plot=TRUE)
+        linkName <- paste0("Voom Plot.pdf")
+        linkAddr <- paste0("voomplot.pdf")
+        linkData <- rbind(linkData, c(linkName, linkAddr))
+        invisible(dev.off())
+
+        # Generate voom fit
+        voomFit <- lmFit(vData, design)
+    }
+
+     # Save normalised counts (log2cpm)
+    if (wantNorm) {
+        norm_counts <- data.frame(vData$genes, vData$E)
+        write.table(norm_counts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE)
+        linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts.tsv"))))
+    }
+
+    # Fit linear model and estimate dispersion with eBayes
+    voomFit <- contrasts.fit(voomFit, contrasts)
+    if (wantRobust) {
+        fit <- eBayes(voomFit, robust=TRUE)
+    } else {
+        fit <- eBayes(voomFit, robust=FALSE)
+    }
+    plotData <- vData
+}
+
+print("Generating normalised MDS plot")
+png(nmdsOutPng, width=600, height=600)
+# Currently only using a single factor
+plotMDS(plotData, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot (normalised)")
+imgName <- "MDS Plot (normalised)"
+imgAddr <- "mdsplot.png"
+imageData <- rbind(imageData, c(imgName, imgAddr))
+invisible(dev.off())
+
+pdf(nmdsOutPdf)
+plotMDS(plotData, labels=labels, cex=0.5)
+linkName <- paste0("MDS Plot (normalised).pdf")
+linkAddr <- paste0("mdsplot.pdf")
+linkData <- rbind(linkData, c(linkName, linkAddr))
+invisible(dev.off())
+
+
+print("Generating DE results")
+status = decideTests(fit, adjust.method=opt$pAdjOpt, p.value=opt$pValReq,
+                       lfc=opt$lfcReq)
+sumStatus <- summary(status)
+
+for (i in 1:length(contrastData)) {
+    # Collect counts for differential expression
+    upCount[i] <- sumStatus["Up", i]
+    downCount[i] <- sumStatus["Down", i]
+    flatCount[i] <- sumStatus["NotSig", i]
+
+    # Write top expressions table
+    top <- topTable(fit, coef=i, number=Inf, sort.by="P")
+    write.table(top, file=topOut[i], row.names=FALSE, sep="\t", quote=FALSE)
+
+    linkName <- paste0(deMethod, "_", contrastData[i], ".tsv")
+    linkAddr <- paste0(deMethod, "_", contrastData[i], ".tsv")
+    linkData <- rbind(linkData, c(linkName, linkAddr))
+
+    # Plot MA (log ratios vs mean average) using limma package on weighted
+    pdf(maOutPdf[i])
+    limma::plotMD(fit, status=status, coef=i,
+                  main=paste("MA Plot:", unmake.names(contrastData[i])),
+                  col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
+                  xlab="Average Expression", ylab="logFC")
+
+    abline(h=0, col="grey", lty=2)
+
+    linkName <- paste0("MA Plot_", contrastData[i], " (.pdf)")
+    linkAddr <- paste0("maplot_", contrastData[i], ".pdf")
+    linkData <- rbind(linkData, c(linkName, linkAddr))
+    invisible(dev.off())
+
+    png(maOutPng[i], height=600, width=600)
+    limma::plotMD(fit, status=status, coef=i,
+                  main=paste("MA Plot:", unmake.names(contrastData[i])),
+                  col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
+                  xlab="Average Expression", ylab="logFC")
+
+    abline(h=0, col="grey", lty=2)
+
+    imgName <- paste0("MA Plot_", contrastData[i])
+    imgAddr <- paste0("maplot_", contrastData[i], ".png")
+    imageData <- rbind(imageData, c(imgName, imgAddr))
+    invisible(dev.off())
+}
+sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount)
+row.names(sigDiff) <- contrastData
+
+# Save relevant items as rda object
+if (wantRda) {
+    print("Saving RData")
+    if (wantWeight) {
+      save(data, status, plotData, labels, factors, wts, fit, top, contrasts,
+           design,
+           file=rdaOut, ascii=TRUE)
+    } else {
+      save(data, status, plotData, labels, factors, fit, top, contrasts, design,
+           file=rdaOut, ascii=TRUE)
+    }
+    linkData <- rbind(linkData, c((paste0(deMethod, "_analysis.RData")), (paste0(deMethod, "_analysis.RData"))))
+}
+
+# Record session info
+writeLines(capture.output(sessionInfo()), sessionOut)
+linkData <- rbind(linkData, c("Session Info", "session_info.txt"))
+
+# Record ending time and calculate total run time
+timeEnd <- as.character(Sys.time())
+timeTaken <- capture.output(round(difftime(timeEnd,timeStart), digits=3))
+timeTaken <- gsub("Time difference of ", "", timeTaken, fixed=TRUE)
+################################################################################
+### HTML Generation
+################################################################################
+
+# Clear file
+cat("", file=opt$htmlPath)
+
+cata("<html>\n")
+
+cata("<body>\n")
+cata("<h3>Limma Analysis Output:</h3>\n")
+cata("Links to PDF copies of plots are in 'Plots' section below />\n")
+if (wantWeight) {
+    HtmlImage(imageData$Link[1], imageData$Label[1], width=1000)
+} else {
+    HtmlImage(imageData$Link[1], imageData$Label[1])
+}
+
+for (i in 2:nrow(imageData)) {
+    HtmlImage(imageData$Link[i], imageData$Label[i])
+}
+
+cata("<h4>Differential Expression Counts:</h4>\n")
+
+cata("<table border=\"1\" cellpadding=\"4\">\n")
+cata("<tr>\n")
+TableItem()
+for (i in colnames(sigDiff)) {
+    TableHeadItem(i)
+}
+cata("</tr>\n")
+for (i in 1:nrow(sigDiff)) {
+    cata("<tr>\n")
+    TableHeadItem(unmake.names(row.names(sigDiff)[i]))
+    for (j in 1:ncol(sigDiff)) {
+        TableItem(as.character(sigDiff[i, j]))
+    }
+    cata("</tr>\n")
+}
+cata("</table>")
+
+cata("<h4>Plots:</h4>\n")
+for (i in 1:nrow(linkData)) {
+    if (grepl(".pdf", linkData$Link[i])) {
+        HtmlLink(linkData$Link[i], linkData$Label[i])
+  }
+}
+
+cata("<h4>Tables:</h4>\n")
+for (i in 1:nrow(linkData)) {
+    if (grepl(".tsv", linkData$Link[i])) {
+        HtmlLink(linkData$Link[i], linkData$Label[i])
+    }
+}
+
+if (wantRda) {
+    cata("<h4>R Data Object:</h4>\n")
+    for (i in 1:nrow(linkData)) {
+        if (grepl(".RData", linkData$Link[i])) {
+            HtmlLink(linkData$Link[i], linkData$Label[i])
+        }
+    }
+}
+
+cata("<p>Alt-click links to download file.</p>\n")
+cata("<p>Click floppy disc icon associated history item to download ")
+cata("all files.</p>\n")
+cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n")
+
+cata("<h4>Additional Information</h4>\n")
+cata("<ul>\n")
+
+if (filtCPM || filtSmpCount || filtTotCount) {
+    if (filtCPM) {
+    tempStr <- paste("Genes without more than", opt$cmpReq,
+                                     "CPM in at least", opt$sampleReq, "samples are insignificant",
+                                     "and filtered out.")
+    } else if (filtSmpCount) {
+        tempStr <- paste("Genes without more than", opt$cntReq,
+                                     "counts in at least", opt$sampleReq, "samples are insignificant",
+                                     "and filtered out.")
+    } else if (filtTotCount) {
+            tempStr <- paste("Genes without more than", opt$cntReq,
+                                     "counts, after summing counts for all samples, are insignificant",
+                                     "and filtered out.")
+    }
+
+    ListItem(tempStr)
+    filterProp <- round(filteredCount/preFilterCount*100, digits=2)
+    tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp,
+                                     "%) genes were filtered out for low expression.")
+    ListItem(tempStr)
+}
+ListItem(opt$normOpt, " was the method used to normalise library sizes.")
+if (wantTrend) {
+    ListItem("The limma-trend method was used.")
+} else {
+    ListItem("The limma-voom method was used.")
+}
+if (wantWeight) {
+    ListItem("Weights were applied to samples.")
+} else {
+    ListItem("Weights were not applied to samples.")
+}
+if (wantRobust) {
+    ListItem("eBayes was used with robust settings (robust=TRUE).")
+}
+if (opt$pAdjOpt!="none") {
+    if (opt$pAdjOpt=="BH" || opt$pAdjOpt=="BY") {
+        tempStr <- paste0("MA-Plot highlighted genes are significant at FDR ",
+                        "of ", opt$pValReq," and exhibit log2-fold-change of at ",
+                        "least ", opt$lfcReq, ".")
+        ListItem(tempStr)
+    } else if (opt$pAdjOpt=="holm") {
+        tempStr <- paste0("MA-Plot highlighted genes are significant at adjusted ",
+                        "p-value of ", opt$pValReq,"  by the Holm(1979) ",
+                        "method, and exhibit log2-fold-change of at least ",
+                        opt$lfcReq, ".")
+        ListItem(tempStr)
+    }
+  } else {
+        tempStr <- paste0("MA-Plot highlighted genes are significant at p-value ",
+                      "of ", opt$pValReq," and exhibit log2-fold-change of at ",
+                      "least ", opt$lfcReq, ".")
+        ListItem(tempStr)
+}
+cata("</ul>\n")
+
+cata("<h4>Summary of experimental data:</h4>\n")
+
+cata("<p>*CHECK THAT SAMPLES ARE ASSOCIATED WITH CORRECT GROUP(S)*</p>\n")
+
+cata("<table border=\"1\" cellpadding=\"3\">\n")
+cata("<tr>\n")
+TableHeadItem("SampleID")
+TableHeadItem(names(factors)[1]," (Primary Factor)")
+
+if (ncol(factors) > 1) {
+    for (i in names(factors)[2:length(names(factors))]) {
+        TableHeadItem(i)
+    }
+    cata("</tr>\n")
+}
+
+for (i in 1:nrow(factors)) {
+    cata("<tr>\n")
+    TableHeadItem(row.names(factors)[i])
+    for (j in 1:ncol(factors)) {
+        TableItem(as.character(unmake.names(factors[i, j])))
+  }
+  cata("</tr>\n")
+}
+cata("</table>")
+
+cit <- character()
+link <- character()
+link[1] <- paste0("<a href=\"",
+                  "http://www.bioconductor.org/packages/release/bioc/",
+                  "vignettes/limma/inst/doc/usersguide.pdf",
+                  "\">", "limma User's Guide", "</a>.")
+
+link[2] <- paste0("<a href=\"",
+                  "http://www.bioconductor.org/packages/release/bioc/",
+                  "vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf",
+                  "\">", "edgeR User's Guide", "</a>")
+
+cit[1] <- paste("Please cite the following paper for this tool:")
+
+cit[2] <- paste("Liu R, Holik AZ, Su S, Jansz N, Chen K, Leong HS, Blewitt ME,",
+                "Asselin-Labat ML, Smyth GK, Ritchie ME (2015). Why weight? ",
+                "Modelling sample and observational level variability improves power ",
+                "in RNA-seq analyses. Nucleic Acids Research, 43(15), e97.")
+
+cit[3] <- paste("Please cite the paper below for the limma software itself.",
+                "Please also try to cite the appropriate methodology articles",
+                "that describe the statistical methods implemented in limma,",
+                "depending on which limma functions you are using. The",
+                "methodology articles are listed in Section 2.1 of the",
+                link[1],
+                "Cite no. 3 only if sample weights were used.")
+cit[4] <- paste("Smyth GK (2005). Limma: linear models for microarray data.",
+                "In: 'Bioinformatics and Computational Biology Solutions using",
+                "R and Bioconductor'. R. Gentleman, V. Carey, S. doit,.",
+                "Irizarry, W. Huber (eds), Springer, New York, pages 397-420.")
+cit[5] <- paste("Please cite the first paper for the software itself and the",
+                "other papers for the various original statistical methods",
+                "implemented in edgeR.  See Section 1.2 in the", link[2],
+                "for more detail.")
+cit[6] <- paste("Robinson MD, McCarthy DJ and Smyth GK (2010). edgeR: a",
+                "Bioconductor package for differential expression analysis",
+                "of digital gene expression data. Bioinformatics 26, 139-140")
+cit[7] <- paste("Robinson MD and Smyth GK (2007). Moderated statistical tests",
+                "for assessing differences in tag abundance. Bioinformatics",
+                "23, 2881-2887")
+cit[8] <- paste("Robinson MD and Smyth GK (2008). Small-sample estimation of",
+                "negative binomial dispersion, with applications to SAGE data.",
+                "Biostatistics, 9, 321-332")
+cit[9] <- paste("McCarthy DJ, Chen Y and Smyth GK (2012). Differential",
+                "expression analysis of multifactor RNA-Seq experiments with",
+                "respect to biological variation. Nucleic Acids Research 40,",
+                "4288-4297")
+cit[10] <- paste("Law CW, Chen Y, Shi W, and Smyth GK (2014). Voom:",
+                "precision weights unlock linear model analysis tools for",
+                "RNA-seq read counts. Genome Biology 15, R29.")
+cit[11] <- paste("Ritchie ME, Diyagama D, Neilson J, van Laar R,",
+                "Dobrovic A, Holloway A and Smyth GK (2006).",
+                "Empirical array quality weights for microarray data.",
+                "BMC Bioinformatics 7, Article 261.")
+cata("<h3>Citations</h3>\n")
+cata(cit[1], "\n")
+cata("<br>\n")
+cata(cit[2], "\n")
+
+cata("<h4>limma</h4>\n")
+cata(cit[3], "\n")
+cata("<ol>\n")
+ListItem(cit[4])
+ListItem(cit[10])
+ListItem(cit[11])
+cata("</ol>\n")
+
+cata("<h4>edgeR</h4>\n")
+cata(cit[5], "\n")
+cata("<ol>\n")
+ListItem(cit[6])
+ListItem(cit[7])
+ListItem(cit[8])
+ListItem(cit[9])
+cata("</ol>\n")
+
+cata("<p>Please report problems or suggestions to: su.s@wehi.edu.au</p>\n")
+
+for (i in 1:nrow(linkData)) {
+    if (grepl("session_info", linkData$Link[i])) {
+        HtmlLink(linkData$Link[i], linkData$Label[i])
+    }
+}
+
+cata("<table border=\"0\">\n")
+cata("<tr>\n")
+TableItem("Task started at:"); TableItem(timeStart)
+cata("</tr>\n")
+cata("<tr>\n")
+TableItem("Task ended at:"); TableItem(timeEnd)
+cata("</tr>\n")
+cata("<tr>\n")
+TableItem("Task run time:"); TableItem(timeTaken)
+cata("<tr>\n")
+cata("</table>\n")
+
+cata("</body>\n")
+cata("</html>")
author	iuc
date	Sun, 29 Apr 2018 17:36:42 -0400
parents
children	d8a55b5f0de0