comparison deseq2.R @ 17:d9e5cadc7f0b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
author iuc
date Wed, 05 Sep 2018 15:54:03 -0400
parents a416957ee305
children 3bf1b3ec1ddf
comparison
equal deleted inserted replaced
16:a416957ee305 17:d9e5cadc7f0b
44 # get options, using the spec as defined by the enclosed list. 44 # get options, using the spec as defined by the enclosed list.
45 # we read the options from the default: commandArgs(TRUE). 45 # we read the options from the default: commandArgs(TRUE).
46 spec <- matrix(c( 46 spec <- matrix(c(
47 "quiet", "q", 0, "logical", 47 "quiet", "q", 0, "logical",
48 "help", "h", 0, "logical", 48 "help", "h", 0, "logical",
49 "batch_factors", "", 1, "character",
49 "outfile", "o", 1, "character", 50 "outfile", "o", 1, "character",
50 "countsfile", "n", 1, "character", 51 "countsfile", "n", 1, "character",
51 "header", "H", 0, "logical", 52 "header", "H", 0, "logical",
52 "factors", "f", 1, "character", 53 "factors", "f", 1, "character",
53 "files_to_labels", "l", 1, "character", 54 "files_to_labels", "l", 1, "character",
85 TRUE 86 TRUE
86 } else { 87 } else {
87 FALSE 88 FALSE
88 } 89 }
89 90
90 if (!is.null(opt$header)) { 91 source_local <- function(fname){
91 hasHeader <- TRUE 92 argv <- commandArgs(trailingOnly = FALSE)
92 } else { 93 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
93 hasHeader <- FALSE 94 source(paste(base_dir, fname, sep="/"))
94 } 95 }
95 96
96 if (!is.null(opt$tximport)) { 97 source_local('get_deseq_dataset.R')
97 if (is.null(opt$tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport")
98 if (tolower(file_ext(opt$tx2gene)) == "gtf") {
99 gtfFile <- opt$tx2gene
100 } else {
101 gtfFile <- NULL
102 tx2gene <- read.table(opt$tx2gene, header=FALSE)
103 }
104 useTXI <- TRUE
105 } else {
106 useTXI <- FALSE
107 }
108 98
109 suppressPackageStartupMessages({ 99 suppressPackageStartupMessages({
110 library("DESeq2") 100 library("DESeq2")
111 library("RColorBrewer") 101 library("RColorBrewer")
112 library("gplots") 102 library("gplots")
195 cat(paste("other factors in design:",paste(factors[-length(factors)],collapse=","),"\n")) 185 cat(paste("other factors in design:",paste(factors[-length(factors)],collapse=","),"\n"))
196 } 186 }
197 cat("\n---------------------\n") 187 cat("\n---------------------\n")
198 } 188 }
199 189
200 # For JSON input from Galaxy, path is absolute 190 dds <- get_deseq_dataset(sampleTable, header=opt$header, designFormula=designFormula, tximport=opt$tximport, txtype=opt$txtype, tx2gene=opt$tx2gene)
201 dir <- "" 191
202 192 apply_batch_factors <- function (dds, batch_factors) {
203 if (!useTXI & hasHeader) { 193 rownames(batch_factors) <- batch_factors$identifier
204 countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)}) 194 batch_factors <- subset(batch_factors, select = -c(identifier, condition))
205 tbl <- do.call("cbind", countfiles) 195 dds_samples <- colnames(dds)
206 colnames(tbl) <- rownames(sampleTable) # take sample ids from header 196 batch_samples <- rownames(batch_factors)
207 197 if (!setequal(batch_samples, dds_samples)) {
208 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) 198 stop("Batch factor names don't correspond to input sample names, check input files")
209 oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual", 199 }
210 "not_aligned", "alignment_not_unique") 200 dds_data <- colData(dds)
211 specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames 201 # Merge dds_data with batch_factors using indexes, which are sample names
212 tbl <- tbl[!specialRows, , drop = FALSE] 202 # Set sort to False, which maintains the order in dds_data
213 203 reordered_batch <- merge(dds_data, batch_factors, by.x = 0, by.y = 0, sort=F)
214 dds <- DESeqDataSetFromMatrix(countData = tbl, 204 batch_factors <- reordered_batch[, ncol(dds_data):ncol(reordered_batch)]
215 colData = sampleTable[,-c(1:2), drop=FALSE], 205 for (factor in colnames(batch_factors)) {
216 design = designFormula) 206 dds[[factor]] <- batch_factors[[factor]]
217 } else if (!useTXI & !hasHeader) { 207 }
218 208 colnames(dds) <- reordered_batch[,1]
219 # construct the object from HTSeq files 209 return(dds)
220 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, 210 }
221 directory = dir, 211
222 design = designFormula) 212 if (!is.null(opt$batch_factors)) {
223 labs <- unname(unlist(filenames_to_labels[colnames(dds)])) 213 batch_factors <- read.table(opt$batch_factors, sep="\t", header=T)
224 colnames(dds) <- labs 214 dds <- apply_batch_factors(dds = dds, batch_factors = batch_factors)
225 215 batch_design <- colnames(batch_factors)[-c(1,2)]
226 } else { 216 designFormula <- as.formula(paste("~", paste(c(batch_design, rev(factors)), collapse=" + ")))
227 # construct the object using tximport 217 design(dds) <- designFormula
228 # first need to make the tx2gene table
229 # this takes ~2-3 minutes using Bioconductor functions
230 if (!is.null(gtfFile)) {
231 suppressPackageStartupMessages({
232 library("GenomicFeatures")
233 })
234 txdb <- makeTxDbFromGFF(gtfFile, format="gtf")
235 k <- keys(txdb, keytype = "GENEID")
236 df <- select(txdb, keys = k, keytype = "GENEID", columns = "TXNAME")
237 tx2gene <- df[, 2:1] # tx ID, then gene ID
238 }
239 library("tximport")
240 txiFiles <- as.character(sampleTable[,2])
241 labs <- unname(unlist(filenames_to_labels[sampleTable[,1]]))
242 names(txiFiles) <- labs
243 txi <- tximport(txiFiles, type=opt$txtype, tx2gene=tx2gene)
244 dds <- DESeqDataSetFromTximport(txi,
245 sampleTable[,3:ncol(sampleTable),drop=FALSE],
246 designFormula)
247 } 218 }
248 219
249 if (verbose) { 220 if (verbose) {
250 cat("DESeq2 run information\n\n") 221 cat("DESeq2 run information\n\n")
251 cat("sample table:\n") 222 cat("sample table:\n")