comparison deseq2.R @ 30:8fe98f7094de draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 6868b66f73ddbe947986d1a20b546873cbd515a9
author iuc
date Fri, 26 Aug 2022 11:16:15 +0000
parents cd9874cb9019
children 9a882d108833
comparison
equal deleted inserted replaced
29:cd9874cb9019 30:8fe98f7094de
29 # 1 "parametric" 29 # 1 "parametric"
30 # 2 "local" 30 # 2 "local"
31 # 3 "mean" 31 # 3 "mean"
32 32
33 # setup R error handling to go to stderr 33 # setup R error handling to go to stderr
34 options(show.error.messages = F, error = function() { 34 options(show.error.messages = FALSE, error = function() {
35 cat(geterrmessage(), file = stderr()); q("no", 1, F) 35 cat(geterrmessage(), file = stderr())
36 q("no", 1, FALSE)
36 }) 37 })
37 38
38 # we need that to not crash galaxy with an UTF8 error on German LC settings. 39 # we need that to not crash galaxy with an UTF8 error on German LC settings.
39 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 40 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
40 41
67 "many_contrasts", "m", 0, "logical", 68 "many_contrasts", "m", 0, "logical",
68 "outlier_replace_off", "a", 0, "logical", 69 "outlier_replace_off", "a", 0, "logical",
69 "outlier_filter_off", "b", 0, "logical", 70 "outlier_filter_off", "b", 0, "logical",
70 "auto_mean_filter_off", "c", 0, "logical", 71 "auto_mean_filter_off", "c", 0, "logical",
71 "beta_prior_off", "d", 0, "logical", 72 "beta_prior_off", "d", 0, "logical",
72 "alpha_ma", "A", 1, "numeric" 73 "alpha_ma", "A", 1, "numeric",
74 "prefilter", "P", 0, "logical",
75 "prefilter_value", "V", 1, "numeric"
73 ), byrow = TRUE, ncol = 4) 76 ), byrow = TRUE, ncol = 4)
74 opt <- getopt(spec) 77 opt <- getopt(spec)
75 78
76 # if help was asked for print a friendly message 79 # if help was asked for print a friendly message
77 # and exit with a non-zero error code 80 # and exit with a non-zero error code
237 ## If we have no other information, estimate from raw. 240 ## If we have no other information, estimate from raw.
238 cat("\nsize factors for samples: no tximport data, no normalization factors, estimating from raw data\n") 241 cat("\nsize factors for samples: no tximport data, no normalization factors, estimating from raw data\n")
239 size_factors <- estimateSizeFactorsForMatrix(counts(dds)) 242 size_factors <- estimateSizeFactorsForMatrix(counts(dds))
240 } 243 }
241 } 244 }
242 write.table(size_factors, file = opt$sizefactorsfile, sep = "\t", col.names = F, quote = FALSE) 245 write.table(size_factors, file = opt$sizefactorsfile, sep = "\t", col.names = FALSE, quote = FALSE)
243 } 246 }
244 247
245 apply_batch_factors <- function(dds, batch_factors) { 248 apply_batch_factors <- function(dds, batch_factors) {
246 rownames(batch_factors) <- batch_factors$identifier 249 rownames(batch_factors) <- batch_factors$identifier
247 batch_factors <- subset(batch_factors, select = -c(identifier, condition)) 250 batch_factors <- subset(batch_factors, select = -c(identifier, condition))
251 stop("Batch factor names don't correspond to input sample names, check input files") 254 stop("Batch factor names don't correspond to input sample names, check input files")
252 } 255 }
253 dds_data <- colData(dds) 256 dds_data <- colData(dds)
254 # Merge dds_data with batch_factors using indexes, which are sample names 257 # Merge dds_data with batch_factors using indexes, which are sample names
255 # Set sort to False, which maintains the order in dds_data 258 # Set sort to False, which maintains the order in dds_data
256 reordered_batch <- merge(dds_data, batch_factors, by.x = 0, by.y = 0, sort = F) 259 reordered_batch <- merge(dds_data, batch_factors, by.x = 0, by.y = 0, sort = FALSE)
257 batch_factors <- reordered_batch[, ncol(dds_data):ncol(reordered_batch)] 260 batch_factors <- reordered_batch[, ncol(dds_data):ncol(reordered_batch)]
258 for (factor in colnames(batch_factors)) { 261 for (factor in colnames(batch_factors)) {
259 dds[[factor]] <- batch_factors[[factor]] 262 dds[[factor]] <- batch_factors[[factor]]
260 } 263 }
261 colnames(dds) <- reordered_batch[, 1] 264 colnames(dds) <- reordered_batch[, 1]
262 return(dds) 265 return(dds)
263 } 266 }
264 267
265 if (!is.null(opt$batch_factors)) { 268 if (!is.null(opt$batch_factors)) {
266 batch_factors <- read.table(opt$batch_factors, sep = "\t", header = T) 269 batch_factors <- read.table(opt$batch_factors, sep = "\t", header = TRUE)
267 dds <- apply_batch_factors(dds = dds, batch_factors = batch_factors) 270 dds <- apply_batch_factors(dds = dds, batch_factors = batch_factors)
268 batch_design <- colnames(batch_factors)[-c(1, 2)] 271 batch_design <- colnames(batch_factors)[-c(1, 2)]
269 design_formula <- as.formula(paste("~", paste(c(batch_design, rev(factors)), collapse = " + "))) 272 design_formula <- as.formula(paste("~", paste(c(batch_design, rev(factors)), collapse = " + ")))
270 design(dds) <- design_formula 273 design(dds) <- design_formula
271 } 274 }
276 print(sample_table[, -c(1:2), drop = FALSE]) 279 print(sample_table[, -c(1:2), drop = FALSE])
277 cat("\ndesign formula:\n") 280 cat("\ndesign formula:\n")
278 print(design_formula) 281 print(design_formula)
279 cat("\n\n") 282 cat("\n\n")
280 cat(paste(ncol(dds), "samples with counts over", nrow(dds), "genes\n")) 283 cat(paste(ncol(dds), "samples with counts over", nrow(dds), "genes\n"))
284 }
285
286 # minimal pre-filtering
287 if (!is.null(opt$prefilter)) {
288 keep <- rowSums(counts(dds)) >= opt$prefilter_value
289 dds <- dds[keep, ]
281 } 290 }
282 291
283 # optional outlier behavior 292 # optional outlier behavior
284 if (is.null(opt$outlier_replace_off)) { 293 if (is.null(opt$outlier_replace_off)) {
285 min_rep <- 7 294 min_rep <- 7