# HG changeset patch # User iuc # Date 1510060694 18000 # Node ID 9bdff28ae1b1c8e2fb07aab7163f0c7762ff24c3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/edger commit eac022c9c6e51e661c1513306b9fefdad673487d diff -r 000000000000 -r 9bdff28ae1b1 edger.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/edger.R Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,718 @@ +# This tool takes in a matrix of feature counts as well as gene annotations and +# outputs a table of top expressions as well as various plots for differential +# expression analysis +# +# ARGS: htmlPath", "R", 1, "character" -Path to html file linking to other outputs +# outPath", "o", 1, "character" -Path to folder to write all output to +# filesPath", "j", 2, "character" -JSON list object if multiple files input +# matrixPath", "m", 2, "character" -Path to count matrix +# factFile", "f", 2, "character" -Path to factor information file +# factInput", "i", 2, "character" -String containing factors if manually input +# annoPath", "a", 2, "character" -Path to input containing gene annotations +# contrastData", "C", 1, "character" -String containing contrasts of interest +# cpmReq", "c", 2, "double" -Float specifying cpm requirement +# cntReq", "z", 2, "integer" -Integer specifying minimum total count requirement +# sampleReq", "s", 2, "integer" -Integer specifying cpm requirement +# normCounts", "x", 0, "logical" -String specifying if normalised counts should be output +# rdaOpt", "r", 0, "logical" -String specifying if RData should be output +# lfcReq", "l", 1, "double" -Float specifying the log-fold-change requirement +# pValReq", "p", 1, "double" -Float specifying the p-value requirement +# pAdjOpt", "d", 1, "character" -String specifying the p-value adjustment method +# normOpt", "n", 1, "character" -String specifying type of normalisation used +# robOpt", "b", 0, "logical" -String specifying if robust options should be used +# lrtOpt", "t", 0, "logical" -String specifying whether to perform LRT test instead +# +# OUT: +# MDS Plot +# BCV Plot +# QL Plot +# MD Plot +# Expression Table +# HTML file linking to the ouputs +# Optional: +# Normalised counts Table +# RData file +# +# Author: Shian Su - registertonysu@gmail.com - Jan 2014 +# Modified by: Maria Doyle - Oct 2017 (some code taken from the DESeq2 wrapper) + +# Record starting time +timeStart <- as.character(Sys.time()) + +# setup R error handling to go to stderr +options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +# Load all required libraries +library(methods, quietly=TRUE, warn.conflicts=FALSE) +library(statmod, quietly=TRUE, warn.conflicts=FALSE) +library(splines, quietly=TRUE, warn.conflicts=FALSE) +library(edgeR, quietly=TRUE, warn.conflicts=FALSE) +library(limma, quietly=TRUE, warn.conflicts=FALSE) +library(scales, quietly=TRUE, warn.conflicts=FALSE) +library(getopt, quietly=TRUE, warn.conflicts=FALSE) + +################################################################################ +### Function Delcaration +################################################################################ +# Function to sanitise contrast equations so there are no whitespaces +# surrounding the arithmetic operators, leading or trailing whitespace +sanitiseEquation <- function(equation) { + equation <- gsub(" *[+] *", "+", equation) + equation <- gsub(" *[-] *", "-", equation) + equation <- gsub(" *[/] *", "/", equation) + equation <- gsub(" *[*] *", "*", equation) + equation <- gsub("^\\s+|\\s+$", "", equation) + return(equation) +} + +# Function to sanitise group information +sanitiseGroups <- function(string) { + string <- gsub(" *[,] *", ",", string) + string <- gsub("^\\s+|\\s+$", "", string) + return(string) +} + +# Function to change periods to whitespace in a string +unmake.names <- function(string) { + string <- gsub(".", " ", string, fixed=TRUE) + return(string) +} + +# Generate output folder and paths +makeOut <- function(filename) { + return(paste0(opt$outPath, "/", filename)) +} + +# Generating design information +pasteListName <- function(string) { + return(paste0("factors$", string)) +} + +# Create cata function: default path set, default seperator empty and appending +# true by default (Ripped straight from the cat function with altered argument +# defaults) +cata <- function(..., file=opt$htmlPath, sep="", fill=FALSE, labels=NULL, + append=TRUE) { + if (is.character(file)) + if (file == "") + file <- stdout() + else if (substring(file, 1L, 1L) == "|") { + file <- pipe(substring(file, 2L), "w") + on.exit(close(file)) + } + else { + file <- file(file, ifelse(append, "a", "w")) + on.exit(close(file)) + } + .Internal(cat(list(...), file, sep, fill, labels, append)) +} + +# Function to write code for html head and title +HtmlHead <- function(title) { + cata("\n") + cata("", title, "\n") + cata("\n") +} + +# Function to write code for html links +HtmlLink <- function(address, label=address) { + cata("", label, "
\n") +} + +# Function to write code for html images +HtmlImage <- function(source, label=source, height=600, width=600) { + cata("\"",\n") +} + +# Function to write code for html list items +ListItem <- function(...) { + cata("
  • ", ..., "
  • \n") +} + +TableItem <- function(...) { + cata("", ..., "\n") +} + +TableHeadItem <- function(...) { + cata("", ..., "\n") +} + +################################################################################ +### Input Processing +################################################################################ + +# Collect arguments from command line +args <- commandArgs(trailingOnly=TRUE) + +# Get options, using the spec as defined by the enclosed list. +# Read the options from the default: commandArgs(TRUE). +spec <- matrix(c( + "htmlPath", "R", 1, "character", + "outPath", "o", 1, "character", + "filesPath", "j", 2, "character", + "matrixPath", "m", 2, "character", + "factFile", "f", 2, "character", + "factInput", "i", 2, "character", + "annoPath", "a", 2, "character", + "contrastData", "C", 1, "character", + "cpmReq", "c", 1, "double", + "totReq", "y", 0, "logical", + "cntReq", "z", 1, "integer", + "sampleReq", "s", 1, "integer", + "normCounts", "x", 0, "logical", + "rdaOpt", "r", 0, "logical", + "lfcReq", "l", 1, "double", + "pValReq", "p", 1, "double", + "pAdjOpt", "d", 1, "character", + "normOpt", "n", 1, "character", + "robOpt", "b", 0, "logical", + "lrtOpt", "t", 0, "logical"), + byrow=TRUE, ncol=4) +opt <- getopt(spec) + + +if (is.null(opt$matrixPath) & is.null(opt$filesPath)) { + cat("A counts matrix (or a set of counts files) is required.\n") + q(status=1) +} + +if (is.null(opt$cpmReq)) { + filtCPM <- FALSE +} else { + filtCPM <- TRUE +} + +if (is.null(opt$cntReq) || is.null(opt$sampleReq)) { + filtSmpCount <- FALSE +} else { + filtSmpCount <- TRUE +} + +if (is.null(opt$totReq)) { + filtTotCount <- FALSE +} else { + filtTotCount <- TRUE +} + +if (is.null(opt$lrtOpt)) { + wantLRT <- FALSE +} else { + wantLRT <- TRUE +} + +if (is.null(opt$rdaOpt)) { + wantRda <- FALSE +} else { + wantRda <- TRUE +} + +if (is.null(opt$annoPath)) { + haveAnno <- FALSE +} else { + haveAnno <- TRUE +} + +if (is.null(opt$normCounts)) { + wantNorm <- FALSE +} else { + wantNorm <- TRUE +} + +if (is.null(opt$robOpt)) { + wantRobust <- FALSE +} else { + wantRobust <- TRUE +} + + +if (!is.null(opt$filesPath)) { + # Process the separate count files (adapted from DESeq2 wrapper) + library("rjson") + parser <- newJSONParser() + parser$addData(opt$filesPath) + factorList <- parser$getObject() + factors <- sapply(factorList, function(x) x[[1]]) + filenamesIn <- unname(unlist(factorList[[1]][[2]])) + sampleTable <- data.frame(sample=basename(filenamesIn), + filename=filenamesIn, + row.names=filenamesIn, + stringsAsFactors=FALSE) + for (factor in factorList) { + factorName <- factor[[1]] + sampleTable[[factorName]] <- character(nrow(sampleTable)) + lvls <- sapply(factor[[2]], function(x) names(x)) + for (i in seq_along(factor[[2]])) { + files <- factor[[2]][[i]][[1]] + sampleTable[files,factorName] <- lvls[i] + } + sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls) + } + rownames(sampleTable) <- sampleTable$sample + rem <- c("sample","filename") + factors <- sampleTable[, !(names(sampleTable) %in% rem), drop=FALSE] + + #read in count files and create single table + countfiles <- lapply(sampleTable$filename, function(x){read.delim(x, row.names=1)}) + counts <- do.call("cbind", countfiles) + +} else { + # Process the single count matrix + counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", stringsAsFactors=FALSE) + row.names(counts) <- counts[, 1] + counts <- counts[ , -1] + countsRows <- nrow(counts) + + # Process factors + if (is.null(opt$factInput)) { + factorData <- read.table(opt$factFile, header=TRUE, sep="\t") + factors <- factorData[, -1, drop=FALSE] + } else { + factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE)) + factorData <- list() + for (fact in factors) { + newFact <- unlist(strsplit(fact, split="::")) + factorData <- rbind(factorData, newFact) + } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor. + + # Set the row names to be the name of the factor and delete first row + row.names(factorData) <- factorData[, 1] + factorData <- factorData[, -1] + factorData <- sapply(factorData, sanitiseGroups) + factorData <- sapply(factorData, strsplit, split=",") + factorData <- sapply(factorData, make.names) + # Transform factor data into data frame of R factor objects + factors <- data.frame(factorData) + } +} + + # if annotation file provided +if (haveAnno) { + geneanno <- read.table(opt$annoPath, header=TRUE, sep="\t", stringsAsFactors=FALSE) +} + +#Create output directory +dir.create(opt$outPath, showWarnings=FALSE) + +# Split up contrasts separated by comma into a vector then sanitise +contrastData <- unlist(strsplit(opt$contrastData, split=",")) +contrastData <- sanitiseEquation(contrastData) +contrastData <- gsub(" ", ".", contrastData, fixed=TRUE) + +bcvOutPdf <- makeOut("bcvplot.pdf") +bcvOutPng <- makeOut("bcvplot.png") +qlOutPdf <- makeOut("qlplot.pdf") +qlOutPng <- makeOut("qlplot.png") +mdsOutPdf <- makeOut("mdsplot.pdf") +mdsOutPng <- makeOut("mdsplot.png") +mdOutPdf <- character() # Initialise character vector +mdOutPng <- character() +topOut <- character() +for (i in 1:length(contrastData)) { + mdOutPdf[i] <- makeOut(paste0("mdplot_", contrastData[i], ".pdf")) + mdOutPng[i] <- makeOut(paste0("mdplot_", contrastData[i], ".png")) + topOut[i] <- makeOut(paste0("edgeR_", contrastData[i], ".tsv")) +} # Save output paths for each contrast as vectors +normOut <- makeOut("edgeR_normcounts.tsv") +rdaOut <- makeOut("edgeR_analysis.RData") +sessionOut <- makeOut("session_info.txt") + +# Initialise data for html links and images, data frame with columns Label and +# Link +linkData <- data.frame(Label=character(), Link=character(), stringsAsFactors=FALSE) +imageData <- data.frame(Label=character(), Link=character(), stringsAsFactors=FALSE) + +# Initialise vectors for storage of up/down/neutral regulated counts +upCount <- numeric() +downCount <- numeric() +flatCount <- numeric() + +################################################################################ +### Data Processing +################################################################################ + +# Extract counts and annotation data +data <- list() +data$counts <- counts +if (haveAnno) { + data$genes <- geneanno +} else { + data$genes <- data.frame(GeneID=row.names(counts)) +} + +# If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of +# samples. Default is no filtering +preFilterCount <- nrow(data$counts) + +if (filtCPM || filtSmpCount || filtTotCount) { + + if (filtTotCount) { + keep <- rowSums(data$counts) >= opt$cntReq + } else if (filtSmpCount) { + keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq + } else if (filtCPM) { + keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq + } + + data$counts <- data$counts[keep, ] + data$genes <- data$genes[keep, , drop=FALSE] +} + +postFilterCount <- nrow(data$counts) +filteredCount <- preFilterCount-postFilterCount + +# Creating naming data +samplenames <- colnames(data$counts) +sampleanno <- data.frame("sampleID"=samplenames, factors) + + +# Generating the DGEList object "data" +data$samples <- sampleanno +data$samples$lib.size <- colSums(data$counts) +data$samples$norm.factors <- 1 +row.names(data$samples) <- colnames(data$counts) +data <- new("DGEList", data) + +# Name rows of factors according to their sample +row.names(factors) <- names(data$counts) +factorList <- sapply(names(factors), pasteListName) + +formula <- "~0" +for (i in 1:length(factorList)) { + formula <- paste(formula, factorList[i], sep="+") +} + +formula <- formula(formula) +design <- model.matrix(formula) + +for (i in 1:length(factorList)) { + colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE) +} + +# Calculating normalising factor, estimating dispersion +data <- calcNormFactors(data, method=opt$normOpt) + +if (wantRobust) { + data <- estimateDisp(data, design=design, robust=TRUE) +} else { + data <- estimateDisp(data, design=design) +} + +# Generate contrasts information +contrasts <- makeContrasts(contrasts=contrastData, levels=design) + +################################################################################ +### Data Output +################################################################################ + +# Plot MDS +labels <- names(counts) +png(mdsOutPng, width=600, height=600) +# Currently only using a single factor +plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot") +imageData[1, ] <- c("MDS Plot", "mdsplot.png") +invisible(dev.off()) + +pdf(mdsOutPdf) +plotMDS(data, labels=labels, cex=0.5) +linkData[1, ] <- c("MDS Plot.pdf", "mdsplot.pdf") +invisible(dev.off()) + +# BCV Plot +png(bcvOutPng, width=600, height=600) +plotBCV(data, main="BCV Plot") +imgName <- "BCV Plot" +imgAddr <- "bcvplot.png" +imageData <- rbind(imageData, c(imgName, imgAddr)) +invisible(dev.off()) + +pdf(bcvOutPdf) +plotBCV(data, main="BCV Plot") +linkName <- paste0("BCV Plot.pdf") +linkAddr <- paste0("bcvplot.pdf") +linkData <- rbind(linkData, c(linkName, linkAddr)) +invisible(dev.off()) + +# Generate fit +if (wantLRT) { + + fit <- glmFit(data, design) + +} else { + + if (wantRobust) { + fit <- glmQLFit(data, design, robust=TRUE) + } else { + fit <- glmQLFit(data, design) + } + + # Plot QL dispersions + png(qlOutPng, width=600, height=600) + plotQLDisp(fit, main="QL Plot") + imgName <- "QL Plot" + imgAddr <- "qlplot.png" + imageData <- rbind(imageData, c(imgName, imgAddr)) + invisible(dev.off()) + + pdf(qlOutPdf) + plotQLDisp(fit, main="QL Plot") + linkName <- "QL Plot.pdf" + linkAddr <- "qlplot.pdf" + linkData <- rbind(linkData, c(linkName, linkAddr)) + invisible(dev.off()) +} + + # Save normalised counts (log2cpm) +if (wantNorm) { + normalisedCounts <- cpm(data, normalized.lib.sizes=TRUE, log=TRUE) + normalisedCounts <- data.frame(data$genes, normalisedCounts) + write.table (normalisedCounts, file=normOut, row.names=FALSE, sep="\t") + linkData <- rbind(linkData, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv")) +} + + +for (i in 1:length(contrastData)) { + if (wantLRT) { + res <- glmLRT(fit, contrast=contrasts[, i]) + } else { + res <- glmQLFTest(fit, contrast=contrasts[, i]) + } + + status = decideTestsDGE(res, adjust.method=opt$pAdjOpt, p.value=opt$pValReq, + lfc=opt$lfcReq) + sumStatus <- summary(status) + + # Collect counts for differential expression + upCount[i] <- sumStatus["1", ] + downCount[i] <- sumStatus["-1", ] + flatCount[i] <- sumStatus["0", ] + + # Write top expressions table + top <- topTags(res, n=Inf, sort.by="PValue") + write.table(top, file=topOut[i], row.names=FALSE, sep="\t") + + linkName <- paste0("edgeR_", contrastData[i], ".tsv") + linkAddr <- paste0("edgeR_", contrastData[i], ".tsv") + linkData <- rbind(linkData, c(linkName, linkAddr)) + + # Plot MD (log ratios vs mean difference) using limma package + pdf(mdOutPdf[i]) + limma::plotMD(res, status=status, + main=paste("MD Plot:", unmake.names(contrastData[i])), + col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"), + xlab="Average Expression", ylab="logFC") + + abline(h=0, col="grey", lty=2) + + linkName <- paste0("MD Plot_", contrastData[i], ".pdf") + linkAddr <- paste0("mdplot_", contrastData[i], ".pdf") + linkData <- rbind(linkData, c(linkName, linkAddr)) + invisible(dev.off()) + + png(mdOutPng[i], height=600, width=600) + limma::plotMD(res, status=status, + main=paste("MD Plot:", unmake.names(contrastData[i])), + col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"), + xlab="Average Expression", ylab="logFC") + + abline(h=0, col="grey", lty=2) + + imgName <- paste0("MD Plot_", contrastData[i], ".png") + imgAddr <- paste0("mdplot_", contrastData[i], ".png") + imageData <- rbind(imageData, c(imgName, imgAddr)) + invisible(dev.off()) +} +sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount) +row.names(sigDiff) <- contrastData + +# Save relevant items as rda object +if (wantRda) { + if (wantNorm) { + save(counts, data, status, normalisedCounts, labels, factors, fit, res, top, contrasts, design, + file=rdaOut, ascii=TRUE) + } else { + save(counts, data, status, labels, factors, fit, res, top, contrasts, design, + file=rdaOut, ascii=TRUE) + } + linkData <- rbind(linkData, c("edgeR_analysis.RData", "edgeR_analysis.RData")) +} + +# Record session info +writeLines(capture.output(sessionInfo()), sessionOut) +linkData <- rbind(linkData, c("Session Info", "session_info.txt")) + +# Record ending time and calculate total run time +timeEnd <- as.character(Sys.time()) +timeTaken <- capture.output(round(difftime(timeEnd, timeStart), digits=3)) +timeTaken <- gsub("Time difference of ", "", timeTaken, fixed=TRUE) + +################################################################################ +### HTML Generation +################################################################################ + +# Clear file +cat("", file=opt$htmlPath) + +cata("\n") + +cata("\n") +cata("

    edgeR Analysis Output:

    \n") +cata("Links to PDF copies of plots are in 'Plots' section below.
    \n") + +HtmlImage(imageData$Link[1], imageData$Label[1]) + +for (i in 2:nrow(imageData)) { + HtmlImage(imageData$Link[i], imageData$Label[i]) +} + +cata("

    Differential Expression Counts:

    \n") + +cata("\n") +cata("\n") +TableItem() +for (i in colnames(sigDiff)) { + TableHeadItem(i) +} +cata("\n") +for (i in 1:nrow(sigDiff)) { + cata("\n") + TableHeadItem(unmake.names(row.names(sigDiff)[i])) + for (j in 1:ncol(sigDiff)) { + TableItem(as.character(sigDiff[i, j])) + } + cata("\n") +} +cata("
    ") + +cata("

    Plots:

    \n") +for (i in 1:nrow(linkData)) { + if (grepl(".pdf", linkData$Link[i])) { + HtmlLink(linkData$Link[i], linkData$Label[i]) + } +} + +cata("

    Tables:

    \n") +for (i in 1:nrow(linkData)) { + if (grepl(".tsv", linkData$Link[i])) { + HtmlLink(linkData$Link[i], linkData$Label[i]) + } +} + +if (wantRda) { + cata("

    R Data Objects:

    \n") + for (i in 1:nrow(linkData)) { + if (grepl(".RData", linkData$Link[i])) { + HtmlLink(linkData$Link[i], linkData$Label[i]) + } + } +} + +cata("

    Alt-click links to download file.

    \n") +cata("

    Click floppy disc icon associated history item to download ") +cata("all files.

    \n") +cata("

    .tsv files can be viewed in Excel or any spreadsheet program.

    \n") + +cata("

    Additional Information

    \n") +cata("\n") + +cata("

    Summary of experimental data:

    \n") + +cata("

    *CHECK THAT SAMPLES ARE ASSOCIATED WITH CORRECT GROUP(S)*

    \n") + +cata("\n") +cata("\n") +TableHeadItem("SampleID") +TableHeadItem(names(factors)[1], " (Primary Factor)") + + if (ncol(factors) > 1) { + for (i in names(factors)[2:length(names(factors))]) { + TableHeadItem(i) + } + cata("\n") + } + +for (i in 1:nrow(factors)) { + cata("\n") + TableHeadItem(row.names(factors)[i]) + for (j in 1:ncol(factors)) { + TableItem(as.character(unmake.names(factors[i, j]))) + } + cata("\n") +} +cata("
    ") + +for (i in 1:nrow(linkData)) { + if (grepl("session_info", linkData$Link[i])) { + HtmlLink(linkData$Link[i], linkData$Label[i]) + } +} + +cata("\n") +cata("\n") +TableItem("Task started at:"); TableItem(timeStart) +cata("\n") +cata("\n") +TableItem("Task ended at:"); TableItem(timeEnd) +cata("\n") +cata("\n") +TableItem("Task run time:"); TableItem(timeTaken) +cata("\n") +cata("
    \n") + +cata("\n") +cata("") diff -r 000000000000 -r 9bdff28ae1b1 edger.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/edger.xml Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,733 @@ + + + Perform differential expression of count data + + + + bioconductor-edger + r-rjson + r-getopt + + r-scales + + r-statmod + + + /dev/null | grep -v -i "WARNING: ")", scales version" $(R --vanilla --slave -e "library(scales); cat(sessionInfo()\$otherPkgs\$scales\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rjson version" $(R --vanilla --slave -e "library(rjson); cat(sessionInfo()\$otherPkgs\$rjson\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", getopt version" $(R --vanilla --slave -e "library(getopt); cat(sessionInfo()\$otherPkgs\$getopt\$Version)" 2> /dev/null | grep -v -i "WARNING: ") + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ^[\w]+$ + + + + ^[\w,]+$ + + + + + + + + + + + + + + + + + + + + + + + + ^[\w-]+$ + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + +
    + + + + +
    + + +
    + + + + + + + + + + + + + + + + +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10.1093/bioinformatics/btp616 + 10.1093/nar/gkv412 + +
    diff -r 000000000000 -r 9bdff28ae1b1 test-data/Mut1.counts --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Mut1.counts Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,7 @@ +GeneID Mut1 +11287 1463 +11298 1345 +11302 5 +11303 1574 +11304 361 +11305 1762 diff -r 000000000000 -r 9bdff28ae1b1 test-data/Mut2.counts --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Mut2.counts Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,7 @@ +GeneID Mut2 +11287 1441 +11298 1291 +11302 6 +11303 1519 +11304 397 +11305 1942 diff -r 000000000000 -r 9bdff28ae1b1 test-data/Mut3.counts --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Mut3.counts Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,7 @@ +GeneID Mut3 +11287 1495 +11298 1346 +11302 5 +11303 1654 +11304 346 +11305 2027 diff -r 000000000000 -r 9bdff28ae1b1 test-data/WT1.counts --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/WT1.counts Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,7 @@ +GeneID WT1 +11287 1699 +11298 1905 +11302 6 +11303 2099 +11304 356 +11305 2528 diff -r 000000000000 -r 9bdff28ae1b1 test-data/WT2.counts --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/WT2.counts Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,7 @@ +GeneID WT2 +11287 1528 +11298 1744 +11302 8 +11303 1974 +11304 312 +11305 2438 diff -r 000000000000 -r 9bdff28ae1b1 test-data/WT3.counts --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/WT3.counts Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,7 @@ +GeneID WT3 +11287 1601 +11298 1834 +11302 7 +11303 2100 +11304 337 +11305 2493 diff -r 000000000000 -r 9bdff28ae1b1 test-data/anno.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/anno.txt Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,7 @@ +EntrezID Symbol GeneName Chr Length +11287 Pzp pregnancy zone protein 6 4681 +11298 Aanat arylalkylamine N-acetyltransferase 11 1455 +11302 Aatk apoptosis-associated tyrosine kinase 11 5743 +11303 Abca1 ATP-binding cassette, sub-family A (ABC1), member 1 4 10260 +11304 Abca4 ATP-binding cassette, sub-family A (ABC1), member 4 3 7248 +11305 Abca2 ATP-binding cassette, sub-family A (ABC1), member 2 2 8061 \ No newline at end of file diff -r 000000000000 -r 9bdff28ae1b1 test-data/edgeR_Mut-WT.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/edgeR_Mut-WT.tsv Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,7 @@ +"GeneID" "logFC" "logCPM" "F" "PValue" "FDR" +"11304" 0.458203001410391 15.530162861746 32.6285109553746 6.943370724917e-06 4.1660224349502e-05 +"11287" 0.188840644104212 17.6536729774735 20.5671667733158 0.000135453949597801 0.000406361848793403 +"11298" -0.138359578382475 17.6815280107154 10.8470695851279 0.00306012801564425 0.00612025603128849 +"11303" -0.0561156581317604 17.8897677663033 1.50815092591008 0.231329593888878 0.346994390833318 +"11305" -0.0579340818829784 18.1615839598046 1.09689306676368 0.305382540289637 0.366459048347564 +"11302" -0.0682406105165454 10.0898264751075 0.137130529665157 0.884266488139469 0.884266488139469 diff -r 000000000000 -r 9bdff28ae1b1 test-data/edgeR_Mut-WT_2fact.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/edgeR_Mut-WT_2fact.tsv Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,7 @@ +"GeneID" "logFC" "logCPM" "F" "PValue" "FDR" +"11287" 0.189281291475186 17.6499778192954 198.646314971919 7.90598427634257e-09 4.74359056580554e-08 +"11298" -0.13798041694802 17.6843133699537 96.2224552671758 4.15830411749776e-06 1.24749123524933e-05 +"11304" 0.458490715244216 15.526484673111 14.5864146735617 0.00244295799161999 0.00488591598323999 +"11303" -0.0560600217169691 17.8909334307093 6.5300693781724 0.0442859767053646 0.0664289650580469 +"11305" -0.0585095825423414 18.1629882429457 1.07140336604322 0.32103822810743 0.385245873728916 +"11302" -0.0716631320244627 10.0898336653124 0.376796260571098 0.878304702615846 0.878304702615846 diff -r 000000000000 -r 9bdff28ae1b1 test-data/edgeR_Mut-WT_2fact_anno.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/edgeR_Mut-WT_2fact_anno.tsv Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,7 @@ +"EntrezID" "Symbol" "GeneName" "Chr" "Length" "logFC" "logCPM" "F" "PValue" "FDR" +11287 "Pzp" "pregnancy zone protein" 6 4681 0.189281947498313 17.6499778192954 198.646315096405 7.90598424818912e-09 4.74359054891347e-08 +11298 "Aanat" "arylalkylamine N-acetyltransferase" 11 1455 -0.137980416947824 17.6843133699537 96.2224553233548 4.15830411749738e-06 1.24749123524921e-05 +11304 "Abca4" "ATP-binding cassette, sub-family A (ABC1), member 4" 3 7248 0.45849071524422 15.526484673111 14.5864146737822 0.00244295799149183 0.00488591598298366 +11303 "Abca1" "ATP-binding cassette, sub-family A (ABC1), member 1" 4 10260 -0.0560600215744048 17.8909334307093 6.53006938009001 0.0442859767053567 0.066428965058035 +11305 "Abca2" "ATP-binding cassette, sub-family A (ABC1), member 2" 2 8061 -0.0585095828508861 18.1629882429457 1.07140336564628 0.321038228193371 0.385245873832045 +11302 "Aatk" "apoptosis-associated tyrosine kinase" 11 5743 -0.0716631320197652 10.0898336653124 0.376796260576848 0.878304702615841 0.878304702615841 diff -r 000000000000 -r 9bdff28ae1b1 test-data/edgeR_Mut-WT_anno.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/edgeR_Mut-WT_anno.tsv Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,7 @@ +"EntrezID" "Symbol" "GeneName" "Chr" "Length" "logFC" "logCPM" "F" "PValue" "FDR" +11304 "Abca4" "ATP-binding cassette, sub-family A (ABC1), member 4" 3 7248 0.458203001410391 15.530162861746 32.6285109553746 6.943370724917e-06 4.1660224349502e-05 +11287 "Pzp" "pregnancy zone protein" 6 4681 0.188840644104212 17.6536729774735 20.5671667733158 0.000135453949597801 0.000406361848793403 +11298 "Aanat" "arylalkylamine N-acetyltransferase" 11 1455 -0.138359578382475 17.6815280107154 10.8470695851279 0.00306012801564425 0.00612025603128849 +11303 "Abca1" "ATP-binding cassette, sub-family A (ABC1), member 1" 4 10260 -0.0561156581317604 17.8897677663033 1.50815092591008 0.231329593888878 0.346994390833318 +11305 "Abca2" "ATP-binding cassette, sub-family A (ABC1), member 2" 2 8061 -0.0579340818829784 18.1615839598046 1.09689306676368 0.305382540289637 0.366459048347564 +11302 "Aatk" "apoptosis-associated tyrosine kinase" 11 5743 -0.0682406105165454 10.0898264751075 0.137130529665157 0.884266488139469 0.884266488139469 diff -r 000000000000 -r 9bdff28ae1b1 test-data/edgeR_Mut-WT_filt.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/edgeR_Mut-WT_filt.tsv Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,6 @@ +"GeneID" "logFC" "logCPM" "F" "PValue" "FDR" +"11287" 0.187201149217925 17.6526225386971 165.500659651998 5.18054239620105e-10 2.59027119810053e-09 +"11298" -0.140077523013286 17.6838446963123 82.0496288033128 2.92613742709898e-06 7.31534356774746e-06 +"11304" 0.456820345055957 15.5288695886958 25.2675517854784 6.46433259176098e-05 0.00010773887652935 +"11303" -0.0578468398229744 17.8912127135125 5.26103367901545 0.0384341523491632 0.048042690436454 +"11305" -0.0593023205976883 18.1634104549086 0.864302521617601 0.363623540536245 0.363623540536245 diff -r 000000000000 -r 9bdff28ae1b1 test-data/edgeR_WT-Mut.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/edgeR_WT-Mut.tsv Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,7 @@ +"GeneID" "logFC" "logCPM" "F" "PValue" "FDR" +"11304" -0.458203001410391 15.530162861746 32.6285109553746 6.943370724917e-06 4.1660224349502e-05 +"11287" -0.188840644104212 17.6536729774735 20.5671667733158 0.000135453949597801 0.000406361848793403 +"11298" 0.138359578382475 17.6815280107154 10.8470695851279 0.00306012801564425 0.00612025603128849 +"11303" 0.0561156581317604 17.8897677663033 1.50815092591008 0.231329593888878 0.346994390833318 +"11305" 0.0579340818829784 18.1615839598046 1.09689306676368 0.305382540289637 0.366459048347564 +"11302" 0.0682406105165454 10.0898264751075 0.137130529665157 0.884266488139469 0.884266488139469 diff -r 000000000000 -r 9bdff28ae1b1 test-data/edgeR_WT-Mut_2fact_anno.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/edgeR_WT-Mut_2fact_anno.tsv Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,7 @@ +"EntrezID" "Symbol" "GeneName" "Chr" "Length" "logFC" "logCPM" "F" "PValue" "FDR" +11287 "Pzp" "pregnancy zone protein" 6 4681 -0.189281947498313 17.6499778192954 198.646315096405 7.90598424818912e-09 4.74359054891347e-08 +11298 "Aanat" "arylalkylamine N-acetyltransferase" 11 1455 0.137980416947824 17.6843133699537 96.2224553233548 4.15830411749738e-06 1.24749123524921e-05 +11304 "Abca4" "ATP-binding cassette, sub-family A (ABC1), member 4" 3 7248 -0.45849071524422 15.526484673111 14.5864146737822 0.00244295799149183 0.00488591598298366 +11303 "Abca1" "ATP-binding cassette, sub-family A (ABC1), member 1" 4 10260 0.0560600215744048 17.8909334307093 6.53006938009001 0.0442859767053567 0.066428965058035 +11305 "Abca2" "ATP-binding cassette, sub-family A (ABC1), member 2" 2 8061 0.0585095828508861 18.1629882429457 1.07140336564628 0.321038228193371 0.385245873832045 +11302 "Aatk" "apoptosis-associated tyrosine kinase" 11 5743 0.0716631320197652 10.0898336653124 0.376796260576848 0.878304702615841 0.878304702615841 diff -r 000000000000 -r 9bdff28ae1b1 test-data/edgeR_normcounts.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/edgeR_normcounts.tsv Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,7 @@ +"GeneID" "Mut1" "Mut2" "Mut3" "WT1" "WT2" "WT3" +"11287" 17.7717801382127 17.7103668584544 17.7656984572699 17.6075444214943 17.5078565133576 17.5637960881114 +"11298" 17.6504754185442 17.55181161064 17.6142553019077 17.7726234935868 17.6985800110028 17.7597848438911 +"11302" 9.64041099082467 9.8551982993804 9.60469198931215 9.52851478148979 9.97869946791847 9.78190633986473 +"11303" 17.8772707356813 17.7864068634935 17.9114914356477 17.9125147871338 17.8772755854201 17.9551530504837 +"11304" 15.753577788623 15.8510977521242 15.6551142861549 15.3537170121875 15.2168364952853 15.3165751633072 +"11305" 18.0400277799982 18.1407817993511 18.2048423497925 18.1807759635442 18.1818136580236 18.2026167343562 diff -r 000000000000 -r 9bdff28ae1b1 test-data/edgeR_normcounts_anno.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/edgeR_normcounts_anno.tsv Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,7 @@ +"EntrezID" "Symbol" "GeneName" "Chr" "Length" "Mut1" "Mut2" "Mut3" "WT1" "WT2" "WT3" +11287 "Pzp" "pregnancy zone protein" 6 4681 17.7717801382127 17.7103668584544 17.7656984572699 17.6075444214943 17.5078565133576 17.5637960881114 +11298 "Aanat" "arylalkylamine N-acetyltransferase" 11 1455 17.6504754185442 17.55181161064 17.6142553019077 17.7726234935868 17.6985800110028 17.7597848438911 +11302 "Aatk" "apoptosis-associated tyrosine kinase" 11 5743 9.64041099082467 9.8551982993804 9.60469198931215 9.52851478148979 9.97869946791847 9.78190633986473 +11303 "Abca1" "ATP-binding cassette, sub-family A (ABC1), member 1" 4 10260 17.8772707356813 17.7864068634935 17.9114914356477 17.9125147871338 17.8772755854201 17.9551530504837 +11304 "Abca4" "ATP-binding cassette, sub-family A (ABC1), member 4" 3 7248 15.753577788623 15.8510977521242 15.6551142861549 15.3537170121875 15.2168364952853 15.3165751633072 +11305 "Abca2" "ATP-binding cassette, sub-family A (ABC1), member 2" 2 8061 18.0400277799982 18.1407817993511 18.2048423497925 18.1807759635442 18.1818136580236 18.2026167343562 diff -r 000000000000 -r 9bdff28ae1b1 test-data/factorinfo.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/factorinfo.txt Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,7 @@ +Sample Genotype Batch +Mut1 Mut b1 +Mut2 Mut b2 +Mut3 Mut b3 +WT1 WT b1 +WT2 WT b2 +WT3 WT b3 diff -r 000000000000 -r 9bdff28ae1b1 test-data/matrix.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/matrix.txt Tue Nov 07 08:18:14 2017 -0500 @@ -0,0 +1,7 @@ +GeneID Mut1 Mut2 Mut3 WT1 WT2 WT3 +11287 1463 1441 1495 1699 1528 1601 +11298 1345 1291 1346 1905 1744 1834 +11302 5 6 5 6 8 7 +11303 1574 1519 1654 2099 1974 2100 +11304 361 397 346 356 312 337 +11305 1762 1942 2027 2528 2438 2493