# HG changeset patch
# User iuc
# Date 1517420742 18000
# Node ID 38aab66ae5cbc72b9c0d9df5ab266a0028e50061
# Parent a330ddf4386195c2093e20cff7c76e382aecc0bb
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 1640914b9812b0482a3cf684f05465f8d9cfdc65
diff -r a330ddf43861 -r 38aab66ae5cb limma_voom.R
--- a/limma_voom.R Thu Sep 07 05:27:27 2017 -0400
+++ b/limma_voom.R Wed Jan 31 12:45:42 2018 -0500
@@ -2,31 +2,40 @@
# outputs a table of top expressions as well as various plots for differential
# expression analysis
#
-# ARGS: 1.countPath -Path to RData input containing counts
-# 2.annoPath -Path to input containing gene annotations
-# 3.htmlPath -Path to html file linking to other outputs
-# 4.outPath -Path to folder to write all output to
-# 5.rdaOpt -String specifying if RData should be saved
-# 6.normOpt -String specifying type of normalisation used
-# 7.weightOpt -String specifying usage of weights
-# 8.contrastData -String containing contrasts of interest
-# 9.cpmReq -Float specifying cpm requirement
-# 10.sampleReq -Integer specifying cpm requirement
-# 11.pAdjOpt -String specifying the p-value adjustment method
-# 12.pValReq -Float specifying the p-value requirement
-# 13.lfcReq -Float specifying the log-fold-change requirement
-# 14.normCounts -String specifying if normalised counts should be output
-# 15.factPath -Path to factor information file
-# 16.factorData -Strings containing factor names and values if manually input
+# ARGS: htmlPath", "R", 1, "character" -Path to html file linking to other outputs
+# outPath", "o", 1, "character" -Path to folder to write all output to
+# filesPath", "j", 2, "character" -JSON list object if multiple files input
+# matrixPath", "m", 2, "character" -Path to count matrix
+# factFile", "f", 2, "character" -Path to factor information file
+# factInput", "i", 2, "character" -String containing factors if manually input
+# annoPath", "a", 2, "character" -Path to input containing gene annotations
+# contrastData", "C", 1, "character" -String containing contrasts of interest
+# cpmReq", "c", 2, "double" -Float specifying cpm requirement
+# cntReq", "z", 2, "integer" -Integer specifying minimum total count requirement
+# sampleReq", "s", 2, "integer" -Integer specifying cpm requirement
+# normCounts", "x", 0, "logical" -String specifying if normalised counts should be output
+# rdaOpt", "r", 0, "logical" -String specifying if RData should be output
+# lfcReq", "l", 1, "double" -Float specifying the log-fold-change requirement
+# pValReq", "p", 1, "double" -Float specifying the p-value requirement
+# pAdjOpt", "d", 1, "character" -String specifying the p-value adjustment method
+# normOpt", "n", 1, "character" -String specifying type of normalisation used
+# robOpt", "b", 0, "logical" -String specifying if robust options should be used
+# trend", "t", 1, "double" -Float for prior.count if limma-trend is used instead of voom
+# weightOpt", "w", 0, "logical" -String specifying if voomWithQualityWeights should be used
#
-# OUT: Voom Plot
-# BCV Plot
-# MA Plot
+# OUT:
+# MDS Plot
+# Voom/SA plot
+# MD Plot
# Expression Table
# HTML file linking to the ouputs
+# Optional:
+# Normalised counts Table
+# RData file
+#
#
# Author: Shian Su - registertonysu@gmail.com - Jan 2014
-# Modified by: Maria Doyle - Jun 2017
+# Modified by: Maria Doyle - Jun 2017, Jan 2018
# Record starting time
timeStart <- as.character(Sys.time())
@@ -38,9 +47,10 @@
library(edgeR, quietly=TRUE, warn.conflicts=FALSE)
library(limma, quietly=TRUE, warn.conflicts=FALSE)
library(scales, quietly=TRUE, warn.conflicts=FALSE)
+library(getopt, quietly=TRUE, warn.conflicts=FALSE)
if (packageVersion("limma") < "3.20.1") {
- stop("Please update 'limma' to version >= 3.20.1 to run this tool")
+ stop("Please update 'limma' to version >= 3.20.1 to run this tool")
}
################################################################################
@@ -49,185 +59,276 @@
# Function to sanitise contrast equations so there are no whitespaces
# surrounding the arithmetic operators, leading or trailing whitespace
sanitiseEquation <- function(equation) {
- equation <- gsub(" *[+] *", "+", equation)
- equation <- gsub(" *[-] *", "-", equation)
- equation <- gsub(" *[/] *", "/", equation)
- equation <- gsub(" *[*] *", "*", equation)
- equation <- gsub("^\\s+|\\s+$", "", equation)
- return(equation)
+ equation <- gsub(" *[+] *", "+", equation)
+ equation <- gsub(" *[-] *", "-", equation)
+ equation <- gsub(" *[/] *", "/", equation)
+ equation <- gsub(" *[*] *", "*", equation)
+ equation <- gsub("^\\s+|\\s+$", "", equation)
+ return(equation)
}
# Function to sanitise group information
sanitiseGroups <- function(string) {
- string <- gsub(" *[,] *", ",", string)
- string <- gsub("^\\s+|\\s+$", "", string)
- return(string)
+ string <- gsub(" *[,] *", ",", string)
+ string <- gsub("^\\s+|\\s+$", "", string)
+ return(string)
}
# Function to change periods to whitespace in a string
unmake.names <- function(string) {
- string <- gsub(".", " ", string, fixed=TRUE)
- return(string)
+ string <- gsub(".", " ", string, fixed=TRUE)
+ return(string)
}
# Generate output folder and paths
makeOut <- function(filename) {
- return(paste0(outPath, "/", filename))
+ return(paste0(opt$outPath, "/", filename))
}
# Generating design information
pasteListName <- function(string) {
- return(paste0("factors$", string))
+ return(paste0("factors$", string))
}
# Create cata function: default path set, default seperator empty and appending
# true by default (Ripped straight from the cat function with altered argument
# defaults)
-cata <- function(..., file = htmlPath, sep = "", fill = FALSE, labels = NULL,
- append = TRUE) {
- if (is.character(file))
- if (file == "")
- file <- stdout()
- else if (substring(file, 1L, 1L) == "|") {
- file <- pipe(substring(file, 2L), "w")
- on.exit(close(file))
- }
- else {
- file <- file(file, ifelse(append, "a", "w"))
- on.exit(close(file))
- }
- .Internal(cat(list(...), file, sep, fill, labels, append))
+cata <- function(..., file = opt$htmlPath, sep = "", fill = FALSE, labels = NULL,
+ append = TRUE) {
+ if (is.character(file))
+ if (file == "")
+ file <- stdout()
+ else if (substring(file, 1L, 1L) == "|") {
+ file <- pipe(substring(file, 2L), "w")
+ on.exit(close(file))
+ }
+ else {
+ file <- file(file, ifelse(append, "a", "w"))
+ on.exit(close(file))
+ }
+ .Internal(cat(list(...), file, sep, fill, labels, append))
}
# Function to write code for html head and title
HtmlHead <- function(title) {
- cata("
\n")
- cata("", title, " \n")
- cata("\n")
+ cata("\n")
+ cata("", title, " \n")
+ cata("\n")
}
# Function to write code for html links
HtmlLink <- function(address, label=address) {
- cata("", label, " \n")
+ cata("", label, " \n")
}
# Function to write code for html images
HtmlImage <- function(source, label=source, height=600, width=600) {
- cata(" \n")
+ cata(" \n")
}
# Function to write code for html list items
ListItem <- function(...) {
- cata("", ..., " \n")
+ cata("", ..., " \n")
}
TableItem <- function(...) {
- cata("", ..., " \n")
+ cata("", ..., " \n")
}
TableHeadItem <- function(...) {
- cata("", ..., " \n")
+ cata("", ..., " \n")
}
################################################################################
### Input Processing
################################################################################
-# Collects arguments from command line
-argv <- commandArgs(TRUE)
+# Collect arguments from command line
+args <- commandArgs(trailingOnly=TRUE)
-# Grab arguments
-countPath <- as.character(argv[1])
-annoPath <- as.character(argv[2])
-htmlPath <- as.character(argv[3])
-outPath <- as.character(argv[4])
-rdaOpt <- as.character(argv[5])
-normOpt <- as.character(argv[6])
-weightOpt <- as.character(argv[7])
-contrastData <- as.character(argv[8])
-cpmReq <- as.numeric(argv[9])
-sampleReq <- as.numeric(argv[10])
-pAdjOpt <- as.character(argv[11])
-pValReq <- as.numeric(argv[12])
-lfcReq <- as.numeric(argv[13])
-normCounts <- as.character(argv[14])
-factPath <- as.character(argv[15])
-# Process factors
-if (as.character(argv[16])=="None") {
- factorData <- read.table(factPath, header=TRUE, sep="\t")
- factors <- factorData[,-1, drop=FALSE]
-} else {
- factorData <- list()
- for (i in 16:length(argv)) {
- newFact <- unlist(strsplit(as.character(argv[i]), split="::"))
- factorData <- rbind(factorData, newFact)
- } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
+# Get options, using the spec as defined by the enclosed list.
+# Read the options from the default: commandArgs(TRUE).
+spec <- matrix(c(
+ "htmlPath", "R", 1, "character",
+ "outPath", "o", 1, "character",
+ "filesPath", "j", 2, "character",
+ "matrixPath", "m", 2, "character",
+ "factFile", "f", 2, "character",
+ "factInput", "i", 2, "character",
+ "annoPath", "a", 2, "character",
+ "contrastData", "C", 1, "character",
+ "cpmReq", "c", 1, "double",
+ "totReq", "y", 0, "logical",
+ "cntReq", "z", 1, "integer",
+ "sampleReq", "s", 1, "integer",
+ "normCounts", "x", 0, "logical",
+ "rdaOpt", "r", 0, "logical",
+ "lfcReq", "l", 1, "double",
+ "pValReq", "p", 1, "double",
+ "pAdjOpt", "d", 1, "character",
+ "normOpt", "n", 1, "character",
+ "robOpt", "b", 0, "logical",
+ "trend", "t", 1, "double",
+ "weightOpt", "w", 0, "logical"),
+ byrow=TRUE, ncol=4)
+opt <- getopt(spec)
- # Set the row names to be the name of the factor and delete first row
- row.names(factorData) <- factorData[, 1]
- factorData <- factorData[, -1]
- factorData <- sapply(factorData, sanitiseGroups)
- factorData <- sapply(factorData, strsplit, split=",")
- factorData <- sapply(factorData, make.names)
- # Transform factor data into data frame of R factor objects
- factors <- data.frame(factorData)
+if (is.null(opt$matrixPath) & is.null(opt$filesPath)) {
+ cat("A counts matrix (or a set of counts files) is required.\n")
+ q(status=1)
+}
+
+if (is.null(opt$cpmReq)) {
+ filtCPM <- FALSE
+} else {
+ filtCPM <- TRUE
}
-# Process other arguments
-if (weightOpt=="yes") {
- wantWeight <- TRUE
+if (is.null(opt$cntReq) || is.null(opt$sampleReq)) {
+ filtSmpCount <- FALSE
+} else {
+ filtSmpCount <- TRUE
+}
+
+if (is.null(opt$totReq)) {
+ filtTotCount <- FALSE
} else {
- wantWeight <- FALSE
+ filtTotCount <- TRUE
+}
+
+if (is.null(opt$rdaOpt)) {
+ wantRda <- FALSE
+} else {
+ wantRda <- TRUE
+}
+
+if (is.null(opt$annoPath)) {
+ haveAnno <- FALSE
+} else {
+ haveAnno <- TRUE
}
-if (rdaOpt=="yes") {
- wantRda <- TRUE
+if (is.null(opt$normCounts)) {
+ wantNorm <- FALSE
} else {
- wantRda <- FALSE
+ wantNorm <- TRUE
+}
+
+if (is.null(opt$robOpt)) {
+ wantRobust <- FALSE
+} else {
+ wantRobust <- TRUE
}
-if (annoPath=="None") {
- haveAnno <- FALSE
+if (is.null(opt$weightOpt)) {
+ wantWeight <- FALSE
} else {
- haveAnno <- TRUE
+ wantWeight <- TRUE
}
-if (normCounts=="yes") {
- wantNorm <- TRUE
+if (is.null(opt$trend)) {
+ wantTrend <- FALSE
+ deMethod <- "limma-voom"
} else {
- wantNorm <- FALSE
+ wantTrend <- TRUE
+ deMethod <- "limma-trend"
+ priorCount <- opt$trend
}
+if (!is.null(opt$filesPath)) {
+ # Process the separate count files (adapted from DESeq2 wrapper)
+ library("rjson")
+ parser <- newJSONParser()
+ parser$addData(opt$filesPath)
+ factorList <- parser$getObject()
+ factors <- sapply(factorList, function(x) x[[1]])
+ filenamesIn <- unname(unlist(factorList[[1]][[2]]))
+ sampleTable <- data.frame(sample=basename(filenamesIn),
+ filename=filenamesIn,
+ row.names=filenamesIn,
+ stringsAsFactors=FALSE)
+ for (factor in factorList) {
+ factorName <- factor[[1]]
+ sampleTable[[factorName]] <- character(nrow(sampleTable))
+ lvls <- sapply(factor[[2]], function(x) names(x))
+ for (i in seq_along(factor[[2]])) {
+ files <- factor[[2]][[i]][[1]]
+ sampleTable[files,factorName] <- lvls[i]
+ }
+ sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls)
+ }
+ rownames(sampleTable) <- sampleTable$sample
+ rem <- c("sample","filename")
+ factors <- sampleTable[, !(names(sampleTable) %in% rem), drop=FALSE]
+
+ #read in count files and create single table
+ countfiles <- lapply(sampleTable$filename, function(x){read.delim(x, row.names=1)})
+ counts <- do.call("cbind", countfiles)
+
+} else {
+ # Process the single count matrix
+ counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
+ row.names(counts) <- counts[, 1]
+ counts <- counts[ , -1]
+ countsRows <- nrow(counts)
+
+ # Process factors
+ if (is.null(opt$factInput)) {
+ factorData <- read.table(opt$factFile, header=TRUE, sep="\t")
+ factors <- factorData[, -1, drop=FALSE]
+ } else {
+ factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE))
+ factorData <- list()
+ for (fact in factors) {
+ newFact <- unlist(strsplit(fact, split="::"))
+ factorData <- rbind(factorData, newFact)
+ } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
+
+ # Set the row names to be the name of the factor and delete first row
+ row.names(factorData) <- factorData[, 1]
+ factorData <- factorData[, -1]
+ factorData <- sapply(factorData, sanitiseGroups)
+ factorData <- sapply(factorData, strsplit, split=",")
+ factorData <- sapply(factorData, make.names)
+ # Transform factor data into data frame of R factor objects
+ factors <- data.frame(factorData)
+ }
+}
+
+ # if annotation file provided
+if (haveAnno) {
+ geneanno <- read.table(opt$annoPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
+}
+
#Create output directory
-dir.create(outPath, showWarnings=FALSE)
+dir.create(opt$outPath, showWarnings=FALSE)
# Split up contrasts seperated by comma into a vector then sanitise
-contrastData <- unlist(strsplit(contrastData, split=","))
+contrastData <- unlist(strsplit(opt$contrastData, split=","))
contrastData <- sanitiseEquation(contrastData)
contrastData <- gsub(" ", ".", contrastData, fixed=TRUE)
-bcvOutPdf <- makeOut("bcvplot.pdf")
-bcvOutPng <- makeOut("bcvplot.png")
-mdsOutPdf <- makeOut("mdsplot.pdf")
-mdsOutPng <- makeOut("mdsplot.png")
-voomOutPdf <- makeOut("voomplot.pdf")
-voomOutPng <- makeOut("voomplot.png")
+
+mdsOutPdf <- makeOut("mdsplot_nonorm.pdf")
+mdsOutPng <- makeOut("mdsplot_nonorm.png")
+nmdsOutPdf <- makeOut("mdsplot.pdf")
+nmdsOutPng <- makeOut("mdsplot.png")
maOutPdf <- character() # Initialise character vector
maOutPng <- character()
topOut <- character()
for (i in 1:length(contrastData)) {
- maOutPdf[i] <- makeOut(paste0("maplot_", contrastData[i], ".pdf"))
- maOutPng[i] <- makeOut(paste0("maplot_", contrastData[i], ".png"))
- topOut[i] <- makeOut(paste0("limma-voom_", contrastData[i], ".tsv"))
-} # Save output paths for each contrast as vectors
-normOut <- makeOut("limma-voom_normcounts.tsv")
-rdaOut <- makeOut("RData.rda")
+ maOutPdf[i] <- makeOut(paste0("maplot_", contrastData[i], ".pdf"))
+ maOutPng[i] <- makeOut(paste0("maplot_", contrastData[i], ".png"))
+ topOut[i] <- makeOut(paste0(deMethod, "_", contrastData[i], ".tsv"))
+}
+normOut <- makeOut(paste0(deMethod, "_normcounts.tsv"))
+rdaOut <- makeOut(paste0(deMethod, "_analysis.RData"))
sessionOut <- makeOut("session_info.txt")
-# Initialise data for html links and images, data frame with columns Label and
+# Initialise data for html links and images, data frame with columns Label and
# Link
linkData <- data.frame(Label=character(), Link=character(),
stringsAsFactors=FALSE)
@@ -238,21 +339,13 @@
upCount <- numeric()
downCount <- numeric()
flatCount <- numeric()
-
-# Read in counts and geneanno data
-counts <- read.table(countPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
-row.names(counts) <- counts[, 1]
-counts <- counts[ , -1]
-countsRows <- nrow(counts)
-if (haveAnno) {
- geneanno <- read.table(annoPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
-}
################################################################################
### Data Processing
################################################################################
# Extract counts and annotation data
+print("Extracting counts")
data <- list()
data$counts <- counts
if (haveAnno) {
@@ -261,12 +354,24 @@
data$genes <- data.frame(GeneID=row.names(counts))
}
-# Filter out genes that do not have a required cpm in a required number of
-# samples
+# If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of
+# samples. Default is no filtering
preFilterCount <- nrow(data$counts)
-sel <- rowSums(cpm(data$counts) > cpmReq) >= sampleReq
-data$counts <- data$counts[sel, ]
-data$genes <- data$genes[sel, ,drop = FALSE]
+
+if (filtCPM || filtSmpCount || filtTotCount) {
+
+ if (filtTotCount) {
+ keep <- rowSums(data$counts) >= opt$cntReq
+ } else if (filtSmpCount) {
+ keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
+ } else if (filtCPM) {
+ keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq
+ }
+
+ data$counts <- data$counts[keep, ]
+ data$genes <- data$genes[keep, , drop=FALSE]
+}
+
postFilterCount <- nrow(data$counts)
filteredCount <- preFilterCount-postFilterCount
@@ -276,173 +381,231 @@
# Generating the DGEList object "data"
+print("Generating DGEList object")
data$samples <- sampleanno
data$samples$lib.size <- colSums(data$counts)
data$samples$norm.factors <- 1
row.names(data$samples) <- colnames(data$counts)
data <- new("DGEList", data)
+print("Generating Design")
+# Name rows of factors according to their sample
+row.names(factors) <- names(data$counts)
factorList <- sapply(names(factors), pasteListName)
-
-formula <- "~0"
+formula <- "~0"
for (i in 1:length(factorList)) {
formula <- paste(formula,factorList[i], sep="+")
}
-
formula <- formula(formula)
design <- model.matrix(formula)
-
for (i in 1:length(factorList)) {
colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE)
}
-# Calculating normalising factor, estimating dispersion
-data <- calcNormFactors(data, method=normOpt)
-#data <- estimateDisp(data, design=design, robust=TRUE)
+# Calculating normalising factors
+print("Calculating Normalisation Factors")
+data <- calcNormFactors(data, method=opt$normOpt)
# Generate contrasts information
+print("Generating Contrasts")
contrasts <- makeContrasts(contrasts=contrastData, levels=design)
-# Name rows of factors according to their sample
-row.names(factors) <- names(data$counts)
-
################################################################################
### Data Output
################################################################################
-
-# BCV Plot
-#png(bcvOutPng, width=600, height=600)
-#plotBCV(data, main="BCV Plot")
-#imageData[1, ] <- c("BCV Plot", "bcvplot.png")
-#invisible(dev.off())
-
-#pdf(bcvOutPdf)
-#plotBCV(data, main="BCV Plot")
-#invisible(dev.off())
-
-if (wantWeight) {
- # Creating voom data object and plot
- png(voomOutPng, width=1000, height=600)
- vData <- voomWithQualityWeights(data, design=design, plot=TRUE)
- imageData[1, ] <- c("Voom Plot", "voomplot.png")
- invisible(dev.off())
-
- pdf(voomOutPdf, width=14)
- vData <- voomWithQualityWeights(data, design=design, plot=TRUE)
- linkData[1, ] <- c("Voom Plot (.pdf)", "voomplot.pdf")
- invisible(dev.off())
-
- # Generating fit data and top table with weights
- wts <- vData$weights
- voomFit <- lmFit(vData, design, weights=wts)
-
-} else {
- # Creating voom data object and plot
- png(voomOutPng, width=600, height=600)
- vData <- voom(data, design=design, plot=TRUE)
- imageData[1, ] <- c("Voom Plot", "voomplot.png")
- invisible(dev.off())
-
- pdf(voomOutPdf)
- vData <- voom(data, design=design, plot=TRUE)
- linkData[1, ] <- c("Voom Plot (.pdf)", "voomplot.pdf")
- invisible(dev.off())
-
- # Generate voom fit
- voomFit <- lmFit(vData, design)
-
-}
-
- # Save normalised counts (log2cpm)
-if (wantNorm) {
- norm_counts <- data.frame(vData$genes, vData$E)
- write.table (norm_counts, file=normOut, row.names=FALSE, sep="\t")
- linkData <- rbind(linkData, c("limma-voom_normcounts.tsv", "limma-voom_normcounts.tsv"))
-}
-
-# Fit linear model and estimate dispersion with eBayes
-voomFit <- contrasts.fit(voomFit, contrasts)
-voomFit <- eBayes(voomFit)
-
# Plot MDS
+print("Generating MDS plot")
labels <- names(counts)
png(mdsOutPng, width=600, height=600)
# Currently only using a single factor
-plotMDS(vData, labels=labels, col=as.numeric(factors[, 1]), cex=0.8)
-imgName <- "Voom Plot"
+plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot (unnormalised)")
+imageData[1, ] <- c("MDS Plot (unnormalised)", "mdsplot_nonorm.png")
+invisible(dev.off())
+
+pdf(mdsOutPdf)
+plotMDS(data, labels=labels, cex=0.5)
+linkData[1, ] <- c("MDS Plot (unnormalised).pdf", "mdsplot_nonorm.pdf")
+invisible(dev.off())
+
+if (wantTrend) {
+ # limma-trend approach
+ logCPM <- cpm(data, log=TRUE, prior.count=opt$trend)
+ fit <- lmFit(logCPM, design)
+ fit <- contrasts.fit(fit, contrasts)
+ if (wantRobust) {
+ fit <- eBayes(fit, trend=TRUE, robust=TRUE)
+ } else {
+ fit <- eBayes(fit, trend=TRUE, robust=FALSE)
+ }
+ # plot fit with plotSA
+ saOutPng <- makeOut("saplot.png")
+ saOutPdf <- makeOut("saplot.pdf")
+
+ png(saOutPng, width=600, height=600)
+ plotSA(fit, main="SA Plot")
+ imgName <- "SA Plot.png"
+ imgAddr <- "saplot.png"
+ imageData <- rbind(imageData, c(imgName, imgAddr))
+ invisible(dev.off())
+
+ pdf(saOutPdf, width=14)
+ plotSA(fit, main="SA Plot")
+ linkName <- paste0("SA Plot.pdf")
+ linkAddr <- paste0("saplot.pdf")
+ linkData <- rbind(linkData, c(linkName, linkAddr))
+ invisible(dev.off())
+
+ plotData <- logCPM
+
+ # Save normalised counts (log2cpm)
+ if (wantNorm) {
+ write.table(logCPM, file=normOut, row.names=TRUE, sep="\t")
+ linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts.tsv"))))
+ }
+} else {
+ # limma-voom approach
+ voomOutPdf <- makeOut("voomplot.pdf")
+ voomOutPng <- makeOut("voomplot.png")
+
+ if (wantWeight) {
+ # Creating voom data object and plot
+ png(voomOutPng, width=1000, height=600)
+ vData <- voomWithQualityWeights(data, design=design, plot=TRUE)
+ imgName <- "Voom Plot.png"
+ imgAddr <- "voomplot.png"
+ imageData <- rbind(imageData, c(imgName, imgAddr))
+ invisible(dev.off())
+
+ pdf(voomOutPdf, width=14)
+ vData <- voomWithQualityWeights(data, design=design, plot=TRUE)
+ linkName <- paste0("Voom Plot.pdf")
+ linkAddr <- paste0("voomplot.pdf")
+ linkData <- rbind(linkData, c(linkName, linkAddr))
+ invisible(dev.off())
+
+ # Generating fit data and top table with weights
+ wts <- vData$weights
+ voomFit <- lmFit(vData, design, weights=wts)
+
+ } else {
+ # Creating voom data object and plot
+ png(voomOutPng, width=600, height=600)
+ vData <- voom(data, design=design, plot=TRUE)
+ imgName <- "Voom Plot"
+ imgAddr <- "voomplot.png"
+ imageData <- rbind(imageData, c(imgName, imgAddr))
+ invisible(dev.off())
+
+ pdf(voomOutPdf)
+ vData <- voom(data, design=design, plot=TRUE)
+ linkName <- paste0("Voom Plot.pdf")
+ linkAddr <- paste0("voomplot.pdf")
+ linkData <- rbind(linkData, c(linkName, linkAddr))
+ invisible(dev.off())
+
+ # Generate voom fit
+ voomFit <- lmFit(vData, design)
+ }
+
+ # Save normalised counts (log2cpm)
+ if (wantNorm) {
+ norm_counts <- data.frame(vData$genes, vData$E)
+ write.table(norm_counts, file=normOut, row.names=FALSE, sep="\t")
+ linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts.tsv"))))
+ }
+
+ # Fit linear model and estimate dispersion with eBayes
+ voomFit <- contrasts.fit(voomFit, contrasts)
+ if (wantRobust) {
+ fit <- eBayes(voomFit, robust=TRUE)
+ } else {
+ fit <- eBayes(voomFit, robust=FALSE)
+ }
+ plotData <- vData
+}
+
+print("Generating normalised MDS plot")
+png(nmdsOutPng, width=600, height=600)
+# Currently only using a single factor
+plotMDS(plotData, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot (normalised)")
+imgName <- "MDS Plot (normalised)"
imgAddr <- "mdsplot.png"
imageData <- rbind(imageData, c(imgName, imgAddr))
invisible(dev.off())
-pdf(mdsOutPdf)
-plotMDS(vData, labels=labels, cex=0.5)
-linkName <- paste0("MDS Plot (.pdf)")
+pdf(nmdsOutPdf)
+plotMDS(plotData, labels=labels, cex=0.5)
+linkName <- paste0("MDS Plot (normalised).pdf")
linkAddr <- paste0("mdsplot.pdf")
linkData <- rbind(linkData, c(linkName, linkAddr))
invisible(dev.off())
+print("Generating DE results")
+status = decideTests(fit, adjust.method=opt$pAdjOpt, p.value=opt$pValReq,
+ lfc=opt$lfcReq)
+sumStatus <- summary(status)
+
for (i in 1:length(contrastData)) {
+ # Collect counts for differential expression
+ upCount[i] <- sumStatus["Up", i]
+ downCount[i] <- sumStatus["Down", i]
+ flatCount[i] <- sumStatus["NotSig", i]
+
+ # Write top expressions table
+ top <- topTable(fit, coef=i, number=Inf, sort.by="P")
+ if (wantTrend) {
+ write.table(top, file=topOut[i], row.names=TRUE, sep="\t")
+ } else {
+ write.table(top, file=topOut[i], row.names=FALSE, sep="\t")
+ }
+
+ linkName <- paste0(deMethod, "_", contrastData[i], ".tsv")
+ linkAddr <- paste0(deMethod, "_", contrastData[i], ".tsv")
+ linkData <- rbind(linkData, c(linkName, linkAddr))
- status = decideTests(voomFit[, i], adjust.method=pAdjOpt, p.value=pValReq,
- lfc=lfcReq)
-
- sumStatus <- summary(status)
-
- # Collect counts for differential expression
- upCount[i] <- sumStatus["1",]
- downCount[i] <- sumStatus["-1",]
- flatCount[i] <- sumStatus["0",]
-
- # Write top expressions table
- top <- topTable(voomFit, coef=i, number=Inf, sort.by="P")
- write.table(top, file=topOut[i], row.names=FALSE, sep="\t")
-
- linkName <- paste0("limma-voom_", contrastData[i], ".tsv")
- linkAddr <- paste0("limma-voom_", contrastData[i], ".tsv")
- linkData <- rbind(linkData, c(linkName, linkAddr))
-
- # Plot MA (log ratios vs mean average) using limma package on weighted
- pdf(maOutPdf[i])
- limma::plotMA(voomFit, status=status, coef=i,
- main=paste("MA Plot:", unmake.names(contrastData[i])),
- col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
- xlab="Average Expression", ylab="logFC")
-
- abline(h=0, col="grey", lty=2)
-
- linkName <- paste0("MA Plot_", contrastData[i], " (.pdf)")
- linkAddr <- paste0("maplot_", contrastData[i], ".pdf")
- linkData <- rbind(linkData, c(linkName, linkAddr))
- invisible(dev.off())
-
- png(maOutPng[i], height=600, width=600)
- limma::plotMA(voomFit, status=status, coef=i,
- main=paste("MA Plot:", unmake.names(contrastData[i])),
- col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
- xlab="Average Expression", ylab="logFC")
-
- abline(h=0, col="grey", lty=2)
-
- imgName <- paste0("MA Plot_", contrastData[i])
- imgAddr <- paste0("maplot_", contrastData[i], ".png")
- imageData <- rbind(imageData, c(imgName, imgAddr))
- invisible(dev.off())
+ # Plot MA (log ratios vs mean average) using limma package on weighted
+ pdf(maOutPdf[i])
+ limma::plotMD(fit, status=status, coef=i,
+ main=paste("MA Plot:", unmake.names(contrastData[i])),
+ col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
+ xlab="Average Expression", ylab="logFC")
+
+ abline(h=0, col="grey", lty=2)
+
+ linkName <- paste0("MA Plot_", contrastData[i], " (.pdf)")
+ linkAddr <- paste0("maplot_", contrastData[i], ".pdf")
+ linkData <- rbind(linkData, c(linkName, linkAddr))
+ invisible(dev.off())
+
+ png(maOutPng[i], height=600, width=600)
+ limma::plotMD(fit, status=status, coef=i,
+ main=paste("MA Plot:", unmake.names(contrastData[i])),
+ col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
+ xlab="Average Expression", ylab="logFC")
+
+ abline(h=0, col="grey", lty=2)
+
+ imgName <- paste0("MA Plot_", contrastData[i])
+ imgAddr <- paste0("maplot_", contrastData[i], ".png")
+ imageData <- rbind(imageData, c(imgName, imgAddr))
+ invisible(dev.off())
}
sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount)
row.names(sigDiff) <- contrastData
# Save relevant items as rda object
if (wantRda) {
- if (wantWeight) {
- save(data, status, vData, labels, factors, wts, voomFit, top, contrasts,
- design,
- file=rdaOut, ascii=TRUE)
- } else {
- save(data, status, vData, labels, factors, voomFit, top, contrasts, design,
- file=rdaOut, ascii=TRUE)
- }
- linkData <- rbind(linkData, c("RData (.rda)", "RData.rda"))
+ print("Saving RData")
+ if (wantWeight) {
+ save(data, status, plotData, labels, factors, wts, fit, top, contrasts,
+ design,
+ file=rdaOut, ascii=TRUE)
+ } else {
+ save(data, status, plotData, labels, factors, fit, top, contrasts, design,
+ file=rdaOut, ascii=TRUE)
+ }
+ linkData <- rbind(linkData, c((paste0(deMethod, "_analysis.RData")), (paste0(deMethod, "_analysis.RData"))))
}
# Record session info
@@ -458,21 +621,21 @@
################################################################################
# Clear file
-cat("", file=htmlPath)
+cat("", file=opt$htmlPath)
cata("\n")
cata("\n")
-cata("Limma-voom Analysis Output: \n")
+cata("Limma Analysis Output: \n")
cata("PDF copies of JPEGS available in 'Plots' section. \n")
if (wantWeight) {
- HtmlImage(imageData$Link[1], imageData$Label[1], width=1000)
+ HtmlImage(imageData$Link[1], imageData$Label[1], width=1000)
} else {
- HtmlImage(imageData$Link[1], imageData$Label[1])
+ HtmlImage(imageData$Link[1], imageData$Label[1])
}
for (i in 2:nrow(imageData)) {
- HtmlImage(imageData$Link[i], imageData$Label[i])
+ HtmlImage(imageData$Link[i], imageData$Label[i])
}
cata("Differential Expression Counts: \n")
@@ -481,40 +644,40 @@
cata("\n")
TableItem()
for (i in colnames(sigDiff)) {
- TableHeadItem(i)
+ TableHeadItem(i)
}
cata(" \n")
for (i in 1:nrow(sigDiff)) {
- cata("\n")
- TableHeadItem(unmake.names(row.names(sigDiff)[i]))
- for (j in 1:ncol(sigDiff)) {
- TableItem(as.character(sigDiff[i, j]))
- }
- cata(" \n")
+ cata("\n")
+ TableHeadItem(unmake.names(row.names(sigDiff)[i]))
+ for (j in 1:ncol(sigDiff)) {
+ TableItem(as.character(sigDiff[i, j]))
+ }
+ cata(" \n")
}
cata("")
cata("Plots: \n")
for (i in 1:nrow(linkData)) {
- if (grepl(".pdf", linkData$Link[i])) {
- HtmlLink(linkData$Link[i], linkData$Label[i])
+ if (grepl(".pdf", linkData$Link[i])) {
+ HtmlLink(linkData$Link[i], linkData$Label[i])
}
}
cata("Tables: \n")
for (i in 1:nrow(linkData)) {
- if (grepl(".tsv", linkData$Link[i])) {
- HtmlLink(linkData$Link[i], linkData$Label[i])
- }
+ if (grepl(".tsv", linkData$Link[i])) {
+ HtmlLink(linkData$Link[i], linkData$Label[i])
+ }
}
if (wantRda) {
- cata("R Data Object: \n")
- for (i in 1:nrow(linkData)) {
- if (grepl(".rda", linkData$Link[i])) {
- HtmlLink(linkData$Link[i], linkData$Label[i])
+ cata("R Data Object: \n")
+ for (i in 1:nrow(linkData)) {
+ if (grepl(".RData", linkData$Link[i])) {
+ HtmlLink(linkData$Link[i], linkData$Label[i])
+ }
}
- }
}
cata("Alt-click links to download file.
\n")
@@ -524,40 +687,60 @@
cata("Additional Information \n")
cata("\n")
-if (cpmReq!=0 && sampleReq!=0) {
- tempStr <- paste("Genes without more than", cpmReq,
- "CPM in at least", sampleReq, "samples are insignificant",
- "and filtered out.")
- ListItem(tempStr)
- filterProp <- round(filteredCount/preFilterCount*100, digits=2)
- tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp,
- "%) genes were filtered out for low expression.")
- ListItem(tempStr)
-}
-ListItem(normOpt, " was the method used to normalise library sizes.")
-if (wantWeight) {
- ListItem("Weights were applied to samples.")
-} else {
- ListItem("Weights were not applied to samples.")
+
+if (filtCPM || filtSmpCount || filtTotCount) {
+ if (filtCPM) {
+ tempStr <- paste("Genes without more than", opt$cmpReq,
+ "CPM in at least", opt$sampleReq, "samples are insignificant",
+ "and filtered out.")
+ } else if (filtSmpCount) {
+ tempStr <- paste("Genes without more than", opt$cntReq,
+ "counts in at least", opt$sampleReq, "samples are insignificant",
+ "and filtered out.")
+ } else if (filtTotCount) {
+ tempStr <- paste("Genes without more than", opt$cntReq,
+ "counts, after summing counts for all samples, are insignificant",
+ "and filtered out.")
+ }
+
+ ListItem(tempStr)
+ filterProp <- round(filteredCount/preFilterCount*100, digits=2)
+ tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp,
+ "%) genes were filtered out for low expression.")
+ ListItem(tempStr)
}
-if (pAdjOpt!="none") {
- if (pAdjOpt=="BH" || pAdjOpt=="BY") {
- tempStr <- paste0("MA-Plot highlighted genes are significant at FDR ",
- "of ", pValReq," and exhibit log2-fold-change of at ",
- "least ", lfcReq, ".")
- ListItem(tempStr)
- } else if (pAdjOpt=="holm") {
- tempStr <- paste0("MA-Plot highlighted genes are significant at adjusted ",
- "p-value of ", pValReq," by the Holm(1979) ",
- "method, and exhibit log2-fold-change of at least ",
- lfcReq, ".")
- ListItem(tempStr)
- }
+ListItem(opt$normOpt, " was the method used to normalise library sizes.")
+if (wantTrend) {
+ ListItem("The limma-trend method was used.")
+} else {
+ ListItem("The limma-voom method was used.")
+}
+if (wantWeight) {
+ ListItem("Weights were applied to samples.")
} else {
- tempStr <- paste0("MA-Plot highlighted genes are significant at p-value ",
- "of ", pValReq," and exhibit log2-fold-change of at ",
- "least ", lfcReq, ".")
- ListItem(tempStr)
+ ListItem("Weights were not applied to samples.")
+}
+if (wantRobust) {
+ ListItem("eBayes was used with robust settings (robust=TRUE).")
+}
+if (opt$pAdjOpt!="none") {
+ if (opt$pAdjOpt=="BH" || opt$pAdjOpt=="BY") {
+ tempStr <- paste0("MA-Plot highlighted genes are significant at FDR ",
+ "of ", opt$pValReq," and exhibit log2-fold-change of at ",
+ "least ", opt$lfcReq, ".")
+ ListItem(tempStr)
+ } else if (opt$pAdjOpt=="holm") {
+ tempStr <- paste0("MA-Plot highlighted genes are significant at adjusted ",
+ "p-value of ", opt$pValReq," by the Holm(1979) ",
+ "method, and exhibit log2-fold-change of at least ",
+ opt$lfcReq, ".")
+ ListItem(tempStr)
+ }
+ } else {
+ tempStr <- paste0("MA-Plot highlighted genes are significant at p-value ",
+ "of ", opt$pValReq," and exhibit log2-fold-change of at ",
+ "least ", opt$lfcReq, ".")
+ ListItem(tempStr)
}
cata(" \n")
@@ -570,19 +753,18 @@
TableHeadItem("SampleID")
TableHeadItem(names(factors)[1]," (Primary Factor)")
- if (ncol(factors) > 1) {
-
+if (ncol(factors) > 1) {
for (i in names(factors)[2:length(names(factors))]) {
- TableHeadItem(i)
+ TableHeadItem(i)
}
cata("\n")
- }
+}
for (i in 1:nrow(factors)) {
- cata("\n")
- TableHeadItem(row.names(factors)[i])
- for (j in 1:ncol(factors)) {
- TableItem(as.character(unmake.names(factors[i, j])))
+ cata(" \n")
+ TableHeadItem(row.names(factors)[i])
+ for (j in 1:ncol(factors)) {
+ TableItem(as.character(unmake.names(factors[i, j])))
}
cata(" \n")
}
@@ -638,7 +820,7 @@
cit[10] <- paste("Law CW, Chen Y, Shi W, and Smyth GK (2014). Voom:",
"precision weights unlock linear model analysis tools for",
"RNA-seq read counts. Genome Biology 15, R29.")
-cit[11] <- paste("Ritchie ME, Diyagama D, Neilson J, van Laar R,",
+cit[11] <- paste("Ritchie ME, Diyagama D, Neilson J, van Laar R,",
"Dobrovic A, Holloway A and Smyth GK (2006).",
"Empirical array quality weights for microarray data.",
"BMC Bioinformatics 7, Article 261.")
@@ -667,9 +849,9 @@
cata("Please report problems or suggestions to: su.s@wehi.edu.au
\n")
for (i in 1:nrow(linkData)) {
- if (grepl("session_info", linkData$Link[i])) {
- HtmlLink(linkData$Link[i], linkData$Label[i])
- }
+ if (grepl("session_info", linkData$Link[i])) {
+ HtmlLink(linkData$Link[i], linkData$Label[i])
+ }
}
cata("\n")
diff -r a330ddf43861 -r 38aab66ae5cb limma_voom.xml
--- a/limma_voom.xml Thu Sep 07 05:27:27 2017 -0400
+++ b/limma_voom.xml Wed Jan 31 12:45:42 2018 -0500
@@ -1,65 +1,100 @@
-
+
- Differential expression with optional sample weights
+ Perform differential expression with limma-voom or limma-trend
- bioconductor-edger
- bioconductor-limma
- r-statmod
- r-scales
+ bioconductor-limma
+ bioconductor-edger
+ r-statmod
+ r-scales
+ r-rjson
+ r-getopt
/dev/null | grep -v -i "WARNING: ")", edgeR version" $(R --vanilla --slave -e "library(edgeR); cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+echo $(R --version | grep version | grep -v GNU)", limma version" $(R --vanilla --slave -e "library(limma); cat(sessionInfo()\$otherPkgs\$limma\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", edgeR version" $(R --vanilla --slave -e "library(edgeR); cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", statmod version" $(R --vanilla --slave -e "library(statmod); cat(sessionInfo()\$otherPkgs\$statmod\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", scales version" $(R --vanilla --slave -e "library(scales); cat(sessionInfo()\$otherPkgs\$scales\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rjson version" $(R --vanilla --slave -e "library(rjson); cat(sessionInfo()\$otherPkgs\$rjson\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", getopt version" $(R --vanilla --slave -e "library(getopt); cat(sessionInfo()\$otherPkgs\$getopt\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
]]>
-
+
+
+
+
+ limma-voom
+ limma-trend
+
+
+
+
+
+
+
+
+
+
+
+
+ Separate Count Files
+ Single Count Matrix
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ No
+ Yes
+
+
+
+
+
+
+
+
+ ^[\w]+$
+
+
+
+ ^[\w,]+$
+
+
+
+
+
+
+
+
+ help="If you provide an annotation file, annotations will be added to the table(s) of differential expression results to provide descriptions for each gene. See Help section below.">
No
Yes
@@ -84,102 +191,88 @@
-
-
- No
- Yes
-
-
-
-
-
-
-
- ^[\w]+$
-
-
-
- ^[\w,]+$
-
-
-
-
- ^[\w]+$
-
-
-
- ^[\w,]+$
-
-
-
-
-
-
-
- ^[\w,-]+$
-
-
-
-
- Yes
- No
+
+
+
+
+ ^[\w-]+$
-
-
-
-
-
-
-
-
-
-
-
- TMM
- RLE
- Upperquartile
- None (Don't normalise)
-
-
-
-
+
-
-
+
+
+
+
+ No
+ Yes
+
+
+
+
+ CPM
+ Counts
+
+
+
+
+
+
+
+
+
+ Total
+ Sample
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
- No
- Yes
+
+
+
-
-
- Benjamini and Hochberg (1995)
- Benjamini and Yekutieli (2001)
- Holm (1979)
- None
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+ Benjamini and Hochberg (1995)
+ Benjamini and Yekutieli (2001)
+ Holm (1979)
+ None
+
+
+ TMM
+ RLE
+ Upperquartile
+ None (Don't normalise)
+
+
+
@@ -191,11 +284,20 @@
+
+
-
-
-
+
+
+
+
+
+
+
+
+
+
@@ -203,29 +305,41 @@
-
+
+
+
-
-
-
+
+
+
+
+
+
+
-
-
+
+
+
-
+
+
-
-
-
+
+
+
+
+
+
+
@@ -233,42 +347,125 @@
+
+
-
-
-
-
-
+
+
+
-
+
+
+
+
+
+
+
-
-
+
+
+
+
-
+
+
+
-
-
+
+
+
-
+
+
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 10.1186/gb-2014-15-2-r29
10.1093/nar/gkv412
-
+
\ No newline at end of file
diff -r a330ddf43861 -r 38aab66ae5cb test-data/Mut1.counts
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Mut1.counts Wed Jan 31 12:45:42 2018 -0500
@@ -0,0 +1,7 @@
+GeneID Mut1
+11287 1463
+11298 1345
+11302 5
+11303 1574
+11304 361
+11305 1762
diff -r a330ddf43861 -r 38aab66ae5cb test-data/Mut2.counts
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Mut2.counts Wed Jan 31 12:45:42 2018 -0500
@@ -0,0 +1,7 @@
+GeneID Mut2
+11287 1441
+11298 1291
+11302 6
+11303 1519
+11304 397
+11305 1942
diff -r a330ddf43861 -r 38aab66ae5cb test-data/Mut3.counts
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Mut3.counts Wed Jan 31 12:45:42 2018 -0500
@@ -0,0 +1,7 @@
+GeneID Mut3
+11287 1495
+11298 1346
+11302 5
+11303 1654
+11304 346
+11305 2027
diff -r a330ddf43861 -r 38aab66ae5cb test-data/WT1.counts
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/WT1.counts Wed Jan 31 12:45:42 2018 -0500
@@ -0,0 +1,7 @@
+GeneID WT1
+11287 1699
+11298 1905
+11302 6
+11303 2099
+11304 356
+11305 2528
diff -r a330ddf43861 -r 38aab66ae5cb test-data/WT2.counts
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/WT2.counts Wed Jan 31 12:45:42 2018 -0500
@@ -0,0 +1,7 @@
+GeneID WT2
+11287 1528
+11298 1744
+11302 8
+11303 1974
+11304 312
+11305 2438
diff -r a330ddf43861 -r 38aab66ae5cb test-data/WT3.counts
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/WT3.counts Wed Jan 31 12:45:42 2018 -0500
@@ -0,0 +1,7 @@
+GeneID WT3
+11287 1601
+11298 1834
+11302 7
+11303 2100
+11304 337
+11305 2493
diff -r a330ddf43861 -r 38aab66ae5cb test-data/factorinfo.txt
--- a/test-data/factorinfo.txt Thu Sep 07 05:27:27 2017 -0400
+++ b/test-data/factorinfo.txt Wed Jan 31 12:45:42 2018 -0500
@@ -1,7 +1,7 @@
Sample Genotype Batch
+Mut1 Mut b1
+Mut2 Mut b2
+Mut3 Mut b3
WT1 WT b1
WT2 WT b2
WT3 WT b3
-Mut1 Mut b1
-Mut2 Mut b2
-Mut3 Mut b3
diff -r a330ddf43861 -r 38aab66ae5cb test-data/limma-trend_Mut-WT.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/limma-trend_Mut-WT.tsv Wed Jan 31 12:45:42 2018 -0500
@@ -0,0 +1,7 @@
+"logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B"
+"11304" 0.454006866294872 15.5347733798565 5.73783569533779 6.51380077190898e-06 3.90828046314539e-05 9.50615253604686
+"11287" 0.189211879829905 17.6560194022488 4.52571052740792 0.000138730279836171 0.000416190839508512 3.37443017677096
+"11298" -0.137906115120717 17.6760627799956 -3.28370943058883 0.00313397342215802 0.00626794684431604 -1.40612492217495
+"11303" -0.0565048762006306 17.887811868748 -1.22181580605621 0.233642901495507 0.35046435224326 -5.9848993971045
+"11305" -0.0597744562444475 18.1592280697576 -1.04836609029276 0.304913314924737 0.365895977909685 -6.17897539114193
+"11302" -0.0455235218343741 10.2501491061291 -0.373618198821297 0.711968519770644 0.711968519770644 -6.65188039014484
diff -r a330ddf43861 -r 38aab66ae5cb test-data/limma-voom_Mut-WT.tsv
--- a/test-data/limma-voom_Mut-WT.tsv Thu Sep 07 05:27:27 2017 -0400
+++ b/test-data/limma-voom_Mut-WT.tsv Wed Jan 31 12:45:42 2018 -0500
@@ -1,7 +1,7 @@
"GeneID" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B"
-"11304" 0.457332061341026 15.5254133001226 6.50459574633681 9.98720685006039e-07 5.99232411003624e-06 14.0741948485896
-"11287" 0.190749727701785 17.6546448244617 5.09535410066402 3.26518807654125e-05 9.79556422962375e-05 5.46773893802392
-"11298" -0.138014418336201 17.6747285193431 -3.33168485842331 0.00278753263633162 0.00557506527266324 -1.84301342041449
-"11303" -0.0558958943606989 17.886791401216 -1.30108531275576 0.205582481502297 0.254491025872973 -6.4924124057801
-"11305" -0.0606991650996633 18.1585474109909 -1.28203791127299 0.212075854894144 0.254491025872973 -6.42090197700503
-"11302" -0.0350239682204432 9.78883119065989 -0.236945963165269 0.814709535394087 0.814709535394087 -6.09497670655944
+"11304" 0.457332061341022 15.5254133001226 6.50459574633635 9.98720685007144e-07 5.99232411004286e-06 14.0741948485867
+"11287" 0.190749727701785 17.6546448244617 5.09535410066427 3.26518807653921e-05 9.79556422961764e-05 5.46773893802514
+"11298" -0.138014418336201 17.6747285193431 -3.3316848584234 0.00278753263633103 0.00557506527266206 -1.84301342041422
+"11303" -0.0558958943607024 17.886791401216 -1.30108531275582 0.205582481502277 0.254491025872966 -6.49241240578
+"11305" -0.0606991650996669 18.1585474109909 -1.28203791127301 0.212075854894138 0.254491025872966 -6.42090197700496
+"11302" -0.035023968220445 9.78883119065989 -0.236945963165271 0.814709535394086 0.814709535394086 -6.09497670655939
diff -r a330ddf43861 -r 38aab66ae5cb test-data/limma-voom_Mut-WT_2fact.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/limma-voom_Mut-WT_2fact.tsv Wed Jan 31 12:45:42 2018 -0500
@@ -0,0 +1,7 @@
+"GeneID" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B"
+"11287" 0.190521683937302 17.6546448244617 14.4538942795641 5.93483727833391e-09 3.56090236700035e-08 96.5864491179047
+"11298" -0.13987741320075 17.6747285193431 -7.7190169464239 5.41139783114199e-06 1.6234193493426e-05 22.3495333961476
+"11304" 0.459011254726059 15.5254133001226 5.6408319840902 0.00010884906092378 0.000217698121847559 8.76657226635715
+"11303" -0.0641599901594248 17.886791401216 -2.9900818153928 0.0112725655419901 0.0169088483129851 -2.70089035288878
+"11305" -0.0651479753016169 18.1585474109909 -2.28935282063866 0.0409794711446066 0.0491753653735279 -4.2713352660451
+"11302" -0.0358817134644287 9.78883119065989 -0.439436789626843 0.668154169055813 0.668154169055813 -5.75838315483931
diff -r a330ddf43861 -r 38aab66ae5cb test-data/limma-voom_Mut-WT_2fact_anno.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/limma-voom_Mut-WT_2fact_anno.tsv Wed Jan 31 12:45:42 2018 -0500
@@ -0,0 +1,7 @@
+"EntrezID" "Symbol" "GeneName" "Chr" "Length" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B"
+11287 "Pzp" "pregnancy zone protein" 6 4681 0.190521683937302 17.6546448244617 15.1152647604854 3.5605743515492e-09 2.13634461092952e-08 106.319792304784
+11298 "Aanat" "arylalkylamine N-acetyltransferase" 11 1455 -0.139877413200757 17.6747285193431 -8.49164953574146 2.03124167559177e-06 6.0937250267753e-06 28.5167891813121
+11304 "Abca4" "ATP-binding cassette, sub-family A (ABC1), member 4" 3 7248 0.459011254726057 15.5254133001226 5.60316118539212 0.000115556485215252 0.000231112970430504 8.56249981126828
+11303 "Abca1" "ATP-binding cassette, sub-family A (ABC1), member 1" 4 10260 -0.0641599901594141 17.886791401216 -2.92798536696768 0.0126512435710263 0.0189768653565394 -2.86386681629442
+11305 "Abca2" "ATP-binding cassette, sub-family A (ABC1), member 2" 2 8061 -0.0651479753016169 18.1585474109909 -2.03911882030581 0.0640903140266962 0.0769083768320354 -4.69855556350315
+11302 "Aatk" "apoptosis-associated tyrosine kinase" 11 5743 -0.0358817134644287 9.78883119065989 -0.43940476739557 0.668176726641997 0.668176726641997 -5.75841999159995
diff -r a330ddf43861 -r 38aab66ae5cb test-data/limma-voom_Mut-WT_anno.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/limma-voom_Mut-WT_anno.tsv Wed Jan 31 12:45:42 2018 -0500
@@ -0,0 +1,7 @@
+"EntrezID" "Symbol" "GeneName" "Chr" "Length" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B"
+11304 "Abca4" "ATP-binding cassette, sub-family A (ABC1), member 4" 3 7248 0.457332061341022 15.5254133001226 6.50459574633635 9.98720685007144e-07 5.99232411004286e-06 14.0741948485867
+11287 "Pzp" "pregnancy zone protein" 6 4681 0.190749727701785 17.6546448244617 5.09535410066427 3.26518807653921e-05 9.79556422961764e-05 5.46773893802514
+11298 "Aanat" "arylalkylamine N-acetyltransferase" 11 1455 -0.138014418336201 17.6747285193431 -3.3316848584234 0.00278753263633103 0.00557506527266206 -1.84301342041422
+11303 "Abca1" "ATP-binding cassette, sub-family A (ABC1), member 1" 4 10260 -0.0558958943607024 17.886791401216 -1.30108531275582 0.205582481502277 0.254491025872966 -6.49241240578
+11305 "Abca2" "ATP-binding cassette, sub-family A (ABC1), member 2" 2 8061 -0.0606991650996669 18.1585474109909 -1.28203791127301 0.212075854894138 0.254491025872966 -6.42090197700496
+11302 "Aatk" "apoptosis-associated tyrosine kinase" 11 5743 -0.035023968220445 9.78883119065989 -0.236945963165271 0.814709535394086 0.814709535394086 -6.09497670655939
diff -r a330ddf43861 -r 38aab66ae5cb test-data/limma-voom_Mut-WTanno.tsv
--- a/test-data/limma-voom_Mut-WTanno.tsv Thu Sep 07 05:27:27 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-"EntrezID" "Symbol" "GeneName" "Chr" "Length" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B"
-11304 "Abca4" "ATP-binding cassette, sub-family A (ABC1), member 4" 3 7248 0.457332061341026 15.5254133001226 6.50459574633681 9.98720685006039e-07 5.99232411003624e-06 14.0741948485896
-11287 "Pzp" "pregnancy zone protein" 6 4681 0.190749727701785 17.6546448244617 5.09535410066402 3.26518807654125e-05 9.79556422962375e-05 5.46773893802392
-11298 "Aanat" "arylalkylamine N-acetyltransferase" 11 1455 -0.138014418336201 17.6747285193431 -3.33168485842331 0.00278753263633162 0.00557506527266324 -1.84301342041449
-11303 "Abca1" "ATP-binding cassette, sub-family A (ABC1), member 1" 4 10260 -0.0558958943606989 17.886791401216 -1.30108531275576 0.205582481502297 0.254491025872973 -6.4924124057801
-11305 "Abca2" "ATP-binding cassette, sub-family A (ABC1), member 2" 2 8061 -0.0606991650996633 18.1585474109909 -1.28203791127299 0.212075854894144 0.254491025872973 -6.42090197700503
-11302 "Aatk" "apoptosis-associated tyrosine kinase" 11 5743 -0.0350239682204432 9.78883119065989 -0.236945963165269 0.814709535394087 0.814709535394087 -6.09497670655944
diff -r a330ddf43861 -r 38aab66ae5cb test-data/limma-voom_Mut-WTmultifact.tsv
--- a/test-data/limma-voom_Mut-WTmultifact.tsv Thu Sep 07 05:27:27 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-"GeneID" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B"
-"11287" 0.190521683937291 17.6546448244617 14.4538942795625 5.93483727834149e-09 3.56090236700489e-08 96.5864491178815
-"11298" -0.139877413200757 17.6747285193431 -7.71901694642401 5.4113978311412e-06 1.62341934934236e-05 22.3495333961486
-"11304" 0.459011254726061 15.5254133001226 5.64083198409048 0.000108849060923731 0.000217698121847462 8.76657226635859
-"11303" -0.0641599901594212 17.886791401216 -2.99008181539252 0.0112725655419959 0.0169088483129939 -2.70089035288952
-"11305" -0.0651479753016204 18.1585474109909 -2.28935282063873 0.0409794711446019 0.0491753653735222 -4.27133526604488
-"11302" -0.035881713464434 9.78883119065989 -0.439436789626921 0.668154169055758 0.668154169055758 -5.75838315483926
diff -r a330ddf43861 -r 38aab66ae5cb test-data/limma-voom_WT-Mut.tsv
--- a/test-data/limma-voom_WT-Mut.tsv Thu Sep 07 05:27:27 2017 -0400
+++ b/test-data/limma-voom_WT-Mut.tsv Wed Jan 31 12:45:42 2018 -0500
@@ -1,7 +1,7 @@
"GeneID" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B"
-"11304" -0.457332061341026 15.5254133001226 -6.50459574633681 9.98720685006039e-07 5.99232411003624e-06 14.0741948485896
-"11287" -0.190749727701785 17.6546448244617 -5.09535410066402 3.26518807654125e-05 9.79556422962375e-05 5.46773893802392
-"11298" 0.138014418336201 17.6747285193431 3.33168485842331 0.00278753263633162 0.00557506527266324 -1.84301342041449
-"11303" 0.0558958943606989 17.886791401216 1.30108531275576 0.205582481502297 0.254491025872973 -6.4924124057801
-"11305" 0.0606991650996633 18.1585474109909 1.28203791127299 0.212075854894144 0.254491025872973 -6.42090197700503
-"11302" 0.0350239682204432 9.78883119065989 0.236945963165269 0.814709535394087 0.814709535394087 -6.09497670655944
+"11304" -0.457332061341022 15.5254133001226 -6.50459574633635 9.98720685007144e-07 5.99232411004286e-06 14.0741948485867
+"11287" -0.190749727701785 17.6546448244617 -5.09535410066427 3.26518807653921e-05 9.79556422961764e-05 5.46773893802514
+"11298" 0.138014418336201 17.6747285193431 3.3316848584234 0.00278753263633103 0.00557506527266206 -1.84301342041422
+"11303" 0.0558958943607024 17.886791401216 1.30108531275582 0.205582481502277 0.254491025872966 -6.49241240578
+"11305" 0.0606991650996669 18.1585474109909 1.28203791127301 0.212075854894138 0.254491025872966 -6.42090197700496
+"11302" 0.035023968220445 9.78883119065989 0.236945963165271 0.814709535394086 0.814709535394086 -6.09497670655939
diff -r a330ddf43861 -r 38aab66ae5cb test-data/limma-voom_WT-Mut_2fact_anno.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/limma-voom_WT-Mut_2fact_anno.tsv Wed Jan 31 12:45:42 2018 -0500
@@ -0,0 +1,7 @@
+"EntrezID" "Symbol" "GeneName" "Chr" "Length" "logFC" "AveExpr" "t" "P.Value" "adj.P.Val" "B"
+11287 "Pzp" "pregnancy zone protein" 6 4681 -0.190521683937302 17.6546448244617 -15.1152647604854 3.5605743515492e-09 2.13634461092952e-08 106.319792304784
+11298 "Aanat" "arylalkylamine N-acetyltransferase" 11 1455 0.139877413200757 17.6747285193431 8.49164953574146 2.03124167559177e-06 6.0937250267753e-06 28.5167891813121
+11304 "Abca4" "ATP-binding cassette, sub-family A (ABC1), member 4" 3 7248 -0.459011254726057 15.5254133001226 -5.60316118539212 0.000115556485215252 0.000231112970430504 8.56249981126828
+11303 "Abca1" "ATP-binding cassette, sub-family A (ABC1), member 1" 4 10260 0.0641599901594141 17.886791401216 2.92798536696768 0.0126512435710263 0.0189768653565394 -2.86386681629442
+11305 "Abca2" "ATP-binding cassette, sub-family A (ABC1), member 2" 2 8061 0.0651479753016169 18.1585474109909 2.03911882030581 0.0640903140266962 0.0769083768320354 -4.69855556350315
+11302 "Aatk" "apoptosis-associated tyrosine kinase" 11 5743 0.0358817134644287 9.78883119065989 0.43940476739557 0.668176726641997 0.668176726641997 -5.75841999159995
diff -r a330ddf43861 -r 38aab66ae5cb test-data/limma-voom_normcounts.tsv
--- a/test-data/limma-voom_normcounts.tsv Thu Sep 07 05:27:27 2017 -0400
+++ b/test-data/limma-voom_normcounts.tsv Wed Jan 31 12:45:42 2018 -0500
@@ -1,7 +1,7 @@
-"GeneID" "WT1" "WT2" "WT3" "Mut1" "Mut2" "Mut3"
-"11287" 17.6076545522668 17.5079905058358 17.5639197719462 17.7719335903598 17.7105244453357 17.7658460810258
-"11298" 17.7727137985373 17.6986875511432 17.7598828784201 17.6506532355915 17.5520012519588 17.6144324004081
-"11302" 9.57719962386619 10.0175525101992 9.82560228478005 9.71615817858834 9.91760904198188 9.67886550454371
-"11303" 17.9125899785601 17.8773613214092 17.955228759658 17.8774046020864 17.7865502833631 17.911613462219
-"11304" 15.354518172169 15.2178020484983 15.3174553811097 15.7545783969022 15.8519803740125 15.6561454280436
-"11305" 18.1808259688524 18.1818679259847 18.2026681768366 18.0401341021246 18.1408682071359 18.204920085011
+"GeneID" "Mut1" "Mut2" "Mut3" "WT1" "WT2" "WT3"
+"11287" 17.7719335903598 17.7105244453357 17.7658460810258 17.6076545522668 17.5079905058358 17.5639197719462
+"11298" 17.6506532355915 17.5520012519588 17.6144324004081 17.7727137985373 17.6986875511432 17.7598828784201
+"11302" 9.71615817858834 9.91760904198188 9.67886550454371 9.57719962386619 10.0175525101992 9.82560228478005
+"11303" 17.8774046020864 17.7865502833631 17.911613462219 17.9125899785601 17.8773613214092 17.955228759658
+"11304" 15.7545783969022 15.8519803740125 15.6561454280436 15.354518172169 15.2178020484983 15.3174553811097
+"11305" 18.0401341021246 18.1408682071359 18.204920085011 18.1808259688524 18.1818679259847 18.2026681768366
diff -r a330ddf43861 -r 38aab66ae5cb test-data/limma-voom_normcounts_anno.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/limma-voom_normcounts_anno.tsv Wed Jan 31 12:45:42 2018 -0500
@@ -0,0 +1,7 @@
+"EntrezID" "Symbol" "GeneName" "Chr" "Length" "Mut1" "Mut2" "Mut3" "WT1" "WT2" "WT3"
+11287 "Pzp" "pregnancy zone protein" 6 4681 17.7719335903598 17.7105244453357 17.7658460810258 17.6076545522668 17.5079905058358 17.5639197719462
+11298 "Aanat" "arylalkylamine N-acetyltransferase" 11 1455 17.6506532355915 17.5520012519588 17.6144324004081 17.7727137985373 17.6986875511432 17.7598828784201
+11302 "Aatk" "apoptosis-associated tyrosine kinase" 11 5743 9.71615817858834 9.91760904198188 9.67886550454371 9.57719962386619 10.0175525101992 9.82560228478005
+11303 "Abca1" "ATP-binding cassette, sub-family A (ABC1), member 1" 4 10260 17.8774046020864 17.7865502833631 17.911613462219 17.9125899785601 17.8773613214092 17.955228759658
+11304 "Abca4" "ATP-binding cassette, sub-family A (ABC1), member 4" 3 7248 15.7545783969022 15.8519803740125 15.6561454280436 15.354518172169 15.2178020484983 15.3174553811097
+11305 "Abca2" "ATP-binding cassette, sub-family A (ABC1), member 2" 2 8061 18.0401341021246 18.1408682071359 18.204920085011 18.1808259688524 18.1818679259847 18.2026681768366
diff -r a330ddf43861 -r 38aab66ae5cb test-data/matrix.txt
--- a/test-data/matrix.txt Thu Sep 07 05:27:27 2017 -0400
+++ b/test-data/matrix.txt Wed Jan 31 12:45:42 2018 -0500
@@ -1,7 +1,7 @@
-GeneID WT1 WT2 WT3 Mut1 Mut2 Mut3
-11287 1699 1528 1601 1463 1441 1495
-11298 1905 1744 1834 1345 1291 1346
-11302 6 8 7 5 6 5
-11303 2099 1974 2100 1574 1519 1654
-11304 356 312 337 361 397 346
-11305 2528 2438 2493 1762 1942 2027
+GeneID Mut1 Mut2 Mut3 WT1 WT2 WT3
+11287 1463 1441 1495 1699 1528 1601
+11298 1345 1291 1346 1905 1744 1834
+11302 5 6 5 6 8 7
+11303 1574 1519 1654 2099 1974 2100
+11304 361 397 346 356 312 337
+11305 1762 1942 2027 2528 2438 2493