# HG changeset patch
# User iuc
# Date 1606329415 0
# Node ID de44f8eff84ab6f2d1b8e0472455f74056e5a63b
# Parent 71bacea10eee0b3dfecd9ce84448ce9a6f4524c8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
diff -r 71bacea10eee -r de44f8eff84a deseq2.R
--- a/deseq2.R Sat Feb 29 17:08:08 2020 -0500
+++ b/deseq2.R Wed Nov 25 18:36:55 2020 +0000
@@ -31,7 +31,9 @@
# 3 "mean"
# setup R error handling to go to stderr
-options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+options(show.error.messages = F, error = function() {
+ cat(geterrmessage(), file = stderr()); q("no", 1, F)
+})
# we need that to not crash galaxy with an UTF8 error on German LC settings.
loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
@@ -55,50 +57,46 @@
"header", "H", 0, "logical",
"factors", "f", 1, "character",
"files_to_labels", "l", 1, "character",
- "plots" , "p", 1, "character",
+ "plots", "p", 1, "character",
"tximport", "i", 0, "logical",
"txtype", "y", 1, "character",
"tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF/GFF3 file
"esf", "e", 1, "character",
"fit_type", "t", 1, "integer",
"many_contrasts", "m", 0, "logical",
- "outlier_replace_off" , "a", 0, "logical",
- "outlier_filter_off" , "b", 0, "logical",
+ "outlier_replace_off", "a", 0, "logical",
+ "outlier_filter_off", "b", 0, "logical",
"auto_mean_filter_off", "c", 0, "logical",
- "beta_prior_off", "d", 0, "logical"),
- byrow=TRUE, ncol=4)
+ "beta_prior_off", "d", 0, "logical"
+), byrow = TRUE, ncol = 4)
opt <- getopt(spec)
# if help was asked for print a friendly message
# and exit with a non-zero error code
if (!is.null(opt$help)) {
- cat(getopt(spec, usage=TRUE))
- q(status=1)
+ cat(getopt(spec, usage = TRUE))
+ q(status = 1)
}
# enforce the following required arguments
if (is.null(opt$outfile)) {
cat("'outfile' is required\n")
- q(status=1)
+ q(status = 1)
}
if (is.null(opt$factors)) {
cat("'factors' is required\n")
- q(status=1)
+ q(status = 1)
}
-verbose <- if (is.null(opt$quiet)) {
- TRUE
-} else {
- FALSE
+verbose <- is.null(opt$quiet)
+
+source_local <- function(fname) {
+ argv <- commandArgs(trailingOnly = FALSE)
+ base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
+ source(paste(base_dir, fname, sep = "/"))
}
-source_local <- function(fname){
- argv <- commandArgs(trailingOnly = FALSE)
- base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
- source(paste(base_dir, fname, sep="/"))
-}
-
-source_local('get_deseq_dataset.R')
+source_local("get_deseq_dataset.R")
suppressPackageStartupMessages({
library("DESeq2")
@@ -109,102 +107,116 @@
if (opt$cores > 1) {
library("BiocParallel")
register(MulticoreParam(opt$cores))
- parallel = TRUE
+ parallel <- TRUE
} else {
- parallel = FALSE
+ parallel <- FALSE
}
# build or read sample table
-trim <- function (x) gsub("^\\s+|\\s+$", "", x)
+trim <- function(x) gsub("^\\s+|\\s+$", "", x)
# switch on if 'factors' was provided:
library("rjson")
parser <- newJSONParser()
parser$addData(opt$factors)
-factorList <- parser$getObject()
+factor_list <- parser$getObject()
filenames_to_labels <- fromJSON(opt$files_to_labels)
-factors <- sapply(factorList, function(x) x[[1]])
-primaryFactor <- factors[1]
-filenamesIn <- unname(unlist(factorList[[1]][[2]]))
-labs = unname(unlist(filenames_to_labels[basename(filenamesIn)]))
-sampleTable <- data.frame(sample=basename(filenamesIn),
- filename=filenamesIn,
- row.names=filenamesIn,
- stringsAsFactors=FALSE)
-for (factor in factorList) {
- factorName <- trim(factor[[1]])
- sampleTable[[factorName]] <- character(nrow(sampleTable))
+factors <- sapply(factor_list, function(x) x[[1]])
+primary_factor <- factors[1]
+filenames_in <- unname(unlist(factor_list[[1]][[2]]))
+labs <- unname(unlist(filenames_to_labels[basename(filenames_in)]))
+sample_table <- data.frame(
+ sample = basename(filenames_in),
+ filename = filenames_in,
+ row.names = filenames_in,
+ stringsAsFactors = FALSE
+)
+for (factor in factor_list) {
+ factor_name <- trim(factor[[1]])
+ sample_table[[factor_name]] <- character(nrow(sample_table))
lvls <- sapply(factor[[2]], function(x) names(x))
for (i in seq_along(factor[[2]])) {
files <- factor[[2]][[i]][[1]]
- sampleTable[files,factorName] <- trim(lvls[i])
+ sample_table[files, factor_name] <- trim(lvls[i])
}
- sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls)
+ sample_table[[factor_name]] <- factor(sample_table[[factor_name]], levels = lvls)
}
-rownames(sampleTable) <- labs
+rownames(sample_table) <- labs
-primaryFactor <- factors[1]
-designFormula <- as.formula(paste("~", paste(rev(factors), collapse=" + ")))
+design_formula <- as.formula(paste("~", paste(rev(factors), collapse = " + ")))
# these are plots which are made once for each analysis
-generateGenericPlots <- function(dds, factors) {
+generate_generic_plots <- function(dds, factors) {
library("ggplot2")
library("ggrepel")
library("pheatmap")
rld <- rlog(dds)
- p <- plotPCA(rld, intgroup=rev(factors))
- print(p + geom_text_repel(aes_string(x = "PC1", y = "PC2", label = factor(colnames(dds))), size=3) + geom_point())
+ p <- plotPCA(rld, intgroup = rev(factors))
+ print(p + geom_text_repel(aes_string(x = "PC1", y = "PC2", label = factor(colnames(dds))), size = 3) + geom_point())
dat <- assay(rld)
- distsRL <- dist(t(dat))
- mat <- as.matrix(distsRL)
- colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
- pheatmap(mat,
- clustering_distance_rows=distsRL,
- clustering_distance_cols=distsRL,
- col=colors,
- main="Sample-to-sample distances")
- plotDispEsts(dds, main="Dispersion estimates")
+ dists_rl <- dist(t(dat))
+ mat <- as.matrix(dists_rl)
+ colors <- colorRampPalette(rev(brewer.pal(9, "Blues")))(255)
+ pheatmap(
+ mat,
+ clustering_distance_rows = dists_rl,
+ clustering_distance_cols = dists_rl,
+ col = colors,
+ main = "Sample-to-sample distances"
+ )
+ plotDispEsts(dds, main = "Dispersion estimates")
}
# these are plots which can be made for each comparison, e.g.
# once for C vs A and once for B vs A
-generateSpecificPlots <- function(res, threshold, title_suffix) {
+generate_specific_plots <- function(res, threshold, title_suffix) {
use <- res$baseMean > threshold
if (sum(!use) == 0) {
- h <- hist(res$pvalue, breaks=0:50/50, plot=FALSE)
- barplot(height = h$counts,
- col = "powderblue", space = 0, xlab="p-values", ylab="frequency",
- main=paste("Histogram of p-values for",title_suffix))
- text(x = c(0, length(h$counts)), y = 0, label=paste(c(0,1)), adj=c(0.5,1.7), xpd=NA)
+ h <- hist(res$pvalue, breaks = 0:50 / 50, plot = FALSE)
+ barplot(
+ height = h$counts,
+ col = "powderblue",
+ space = 0,
+ xlab = "p-values",
+ ylab = "frequency",
+ main = paste("Histogram of p-values for", title_suffix)
+ )
+ text(x = c(0, length(h$counts)), y = 0, label = paste(c(0, 1)), adj = c(0.5, 1.7), xpd = NA)
} else {
- h1 <- hist(res$pvalue[!use], breaks=0:50/50, plot=FALSE)
- h2 <- hist(res$pvalue[use], breaks=0:50/50, plot=FALSE)
- colori <- c("filtered (low count)"="khaki", "not filtered"="powderblue")
- barplot(height = rbind(h1$counts, h2$counts), beside = FALSE,
- col = colori, space = 0, xlab="p-values", ylab="frequency",
- main=paste("Histogram of p-values for",title_suffix))
- text(x = c(0, length(h1$counts)), y = 0, label=paste(c(0,1)), adj=c(0.5,1.7), xpd=NA)
- legend("topright", fill=rev(colori), legend=rev(names(colori)), bg="white")
+ h1 <- hist(res$pvalue[!use], breaks = 0:50 / 50, plot = FALSE)
+ h2 <- hist(res$pvalue[use], breaks = 0:50 / 50, plot = FALSE)
+ colori <- c("filtered (low count)" = "khaki", "not filtered" = "powderblue")
+ barplot(
+ height = rbind(h1$counts, h2$counts),
+ beside = FALSE,
+ col = colori,
+ space = 0,
+ xlab = "p-values",
+ ylab = "frequency",
+ main = paste("Histogram of p-values for", title_suffix)
+ )
+ text(x = c(0, length(h1$counts)), y = 0, label = paste(c(0, 1)), adj = c(0.5, 1.7), xpd = NA)
+ legend("topright", fill = rev(colori), legend = rev(names(colori)), bg = "white")
}
- plotMA(res, main= paste("MA-plot for",title_suffix), ylim=range(res$log2FoldChange, na.rm=TRUE))
+ plotMA(res, main = paste("MA-plot for", title_suffix), ylim = range(res$log2FoldChange, na.rm = TRUE))
}
if (verbose) {
- cat(paste("primary factor:",primaryFactor,"\n"))
+ cat(paste("primary factor:", primary_factor, "\n"))
if (length(factors) > 1) {
- cat(paste("other factors in design:",paste(factors[-length(factors)],collapse=","),"\n"))
+ cat(paste("other factors in design:", paste(factors[-length(factors)], collapse = ","), "\n"))
}
cat("\n---------------------\n")
}
-dds <- get_deseq_dataset(sampleTable, header=opt$header, designFormula=designFormula, tximport=opt$tximport, txtype=opt$txtype, tx2gene=opt$tx2gene)
+dds <- get_deseq_dataset(sample_table, header = opt$header, design_formula = design_formula, tximport = opt$tximport, txtype = opt$txtype, tx2gene = opt$tx2gene)
# estimate size factors for the chosen method
-if(!is.null(opt$esf)){
- dds <- estimateSizeFactors(dds, type=opt$esf)
+if (!is.null(opt$esf)) {
+ dds <- estimateSizeFactors(dds, type = opt$esf)
}
-apply_batch_factors <- function (dds, batch_factors) {
+apply_batch_factors <- function(dds, batch_factors) {
rownames(batch_factors) <- batch_factors$identifier
batch_factors <- subset(batch_factors, select = -c(identifier, condition))
dds_samples <- colnames(dds)
@@ -215,155 +227,161 @@
dds_data <- colData(dds)
# Merge dds_data with batch_factors using indexes, which are sample names
# Set sort to False, which maintains the order in dds_data
- reordered_batch <- merge(dds_data, batch_factors, by.x = 0, by.y = 0, sort=F)
+ reordered_batch <- merge(dds_data, batch_factors, by.x = 0, by.y = 0, sort = F)
batch_factors <- reordered_batch[, ncol(dds_data):ncol(reordered_batch)]
for (factor in colnames(batch_factors)) {
dds[[factor]] <- batch_factors[[factor]]
}
- colnames(dds) <- reordered_batch[,1]
+ colnames(dds) <- reordered_batch[, 1]
return(dds)
}
if (!is.null(opt$batch_factors)) {
- batch_factors <- read.table(opt$batch_factors, sep="\t", header=T)
+ batch_factors <- read.table(opt$batch_factors, sep = "\t", header = T)
dds <- apply_batch_factors(dds = dds, batch_factors = batch_factors)
- batch_design <- colnames(batch_factors)[-c(1,2)]
- designFormula <- as.formula(paste("~", paste(c(batch_design, rev(factors)), collapse=" + ")))
- design(dds) <- designFormula
+ batch_design <- colnames(batch_factors)[-c(1, 2)]
+ design_formula <- as.formula(paste("~", paste(c(batch_design, rev(factors)), collapse = " + ")))
+ design(dds) <- design_formula
}
if (verbose) {
cat("DESeq2 run information\n\n")
cat("sample table:\n")
- print(sampleTable[,-c(1:2),drop=FALSE])
+ print(sample_table[, -c(1:2), drop = FALSE])
cat("\ndesign formula:\n")
- print(designFormula)
+ print(design_formula)
cat("\n\n")
cat(paste(ncol(dds), "samples with counts over", nrow(dds), "genes\n"))
}
# optional outlier behavior
if (is.null(opt$outlier_replace_off)) {
- minRep <- 7
+ min_rep <- 7
} else {
- minRep <- Inf
+ min_rep <- Inf
if (verbose) cat("outlier replacement off\n")
}
if (is.null(opt$outlier_filter_off)) {
- cooksCutoff <- TRUE
+ cooks_cutoff <- TRUE
} else {
- cooksCutoff <- FALSE
+ cooks_cutoff <- FALSE
if (verbose) cat("outlier filtering off\n")
}
# optional automatic mean filtering
if (is.null(opt$auto_mean_filter_off)) {
- independentFiltering <- TRUE
+ independent_filtering <- TRUE
} else {
- independentFiltering <- FALSE
+ independent_filtering <- FALSE
if (verbose) cat("automatic filtering on the mean off\n")
}
# shrinkage of LFCs
if (is.null(opt$beta_prior_off)) {
- betaPrior <- TRUE
+ beta_prior <- TRUE
} else {
- betaPrior <- FALSE
+ beta_prior <- FALSE
if (verbose) cat("beta prior off\n")
}
# dispersion fit type
if (is.null(opt$fit_type)) {
- fitType <- "parametric"
+ fit_type <- "parametric"
} else {
- fitType <- c("parametric","local","mean")[opt$fit_type]
+ fit_type <- c("parametric", "local", "mean")[opt$fit_type]
}
-if (verbose) cat(paste("using disperion fit type:",fitType,"\n"))
+if (verbose) cat(paste("using disperion fit type:", fit_type, "\n"))
# run the analysis
-dds <- DESeq(dds, fitType=fitType, betaPrior=betaPrior, minReplicatesForReplace=minRep, parallel=parallel)
+dds <- DESeq(dds, fitType = fit_type, betaPrior = beta_prior, minReplicatesForReplace = min_rep, parallel = parallel)
# create the generic plots and leave the device open
if (!is.null(opt$plots)) {
if (verbose) cat("creating plots\n")
pdf(opt$plots)
- generateGenericPlots(dds, factors)
+ generate_generic_plots(dds, factors)
}
-n <- nlevels(colData(dds)[[primaryFactor]])
-allLevels <- levels(colData(dds)[[primaryFactor]])
+n <- nlevels(colData(dds)[[primary_factor]])
+all_levels <- levels(colData(dds)[[primary_factor]])
if (!is.null(opt$countsfile)) {
- normalizedCounts<-counts(dds,normalized=TRUE)
- write.table(normalizedCounts, file=opt$countsfile, sep="\t", col.names=NA, quote=FALSE)
+ normalized_counts <- counts(dds, normalized = TRUE)
+ write.table(normalized_counts, file = opt$countsfile, sep = "\t", col.names = NA, quote = FALSE)
}
if (!is.null(opt$rlogfile)) {
- rLogNormalized <-rlogTransformation(dds)
- rLogNormalizedMat <- assay(rLogNormalized)
- write.table(rLogNormalizedMat, file=opt$rlogfile, sep="\t", col.names=NA, quote=FALSE)
+ rlog_normalized <- rlogTransformation(dds)
+ rlog_normalized_mat <- assay(rlog_normalized)
+ write.table(rlog_normalized_mat, file = opt$rlogfile, sep = "\t", col.names = NA, quote = FALSE)
}
if (!is.null(opt$vstfile)) {
- vstNormalized<-varianceStabilizingTransformation(dds)
- vstNormalizedMat <- assay(vstNormalized)
- write.table(vstNormalizedMat, file=opt$vstfile, sep="\t", col.names=NA, quote=FALSE)
+ vst_normalized <- varianceStabilizingTransformation(dds)
+ vst_normalized_mat <- assay(vst_normalized)
+ write.table(vst_normalized_mat, file = opt$vstfile, sep = "\t", col.names = NA, quote = FALSE)
}
if (is.null(opt$many_contrasts)) {
# only contrast the first and second level of the primary factor
- ref <- allLevels[1]
- lvl <- allLevels[2]
- res <- results(dds, contrast=c(primaryFactor, lvl, ref),
- cooksCutoff=cooksCutoff,
- independentFiltering=independentFiltering)
+ ref <- all_levels[1]
+ lvl <- all_levels[2]
+ res <- results(
+ dds,
+ contrast = c(primary_factor, lvl, ref),
+ cooksCutoff = cooks_cutoff,
+ independentFiltering = independent_filtering
+ )
if (verbose) {
cat("summary of results\n")
- cat(paste0(primaryFactor,": ",lvl," vs ",ref,"\n"))
+ cat(paste0(primary_factor, ": ", lvl, " vs ", ref, "\n"))
print(summary(res))
}
- resSorted <- res[order(res$padj),]
- outDF <- as.data.frame(resSorted)
- outDF$geneID <- rownames(outDF)
- outDF <- outDF[,c("geneID", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]
+ res_sorted <- res[order(res$padj), ]
+ out_df <- as.data.frame(res_sorted)
+ out_df$geneID <- rownames(out_df) # nolint
+ out_df <- out_df[, c("geneID", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]
filename <- opt$outfile
- write.table(outDF, file=filename, sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE)
- if (independentFiltering) {
+ write.table(out_df, file = filename, sep = "\t", quote = FALSE, row.names = FALSE, col.names = FALSE)
+ if (independent_filtering) {
threshold <- unname(attr(res, "filterThreshold"))
} else {
threshold <- 0
}
- title_suffix <- paste0(primaryFactor,": ",lvl," vs ",ref)
+ title_suffix <- paste0(primary_factor, ": ", lvl, " vs ", ref)
if (!is.null(opt$plots)) {
- generateSpecificPlots(res, threshold, title_suffix)
+ generate_specific_plots(res, threshold, title_suffix)
}
} else {
# rotate through the possible contrasts of the primary factor
# write out a sorted table of results with the contrast as a suffix
# add contrast specific plots to the device
- for (i in seq_len(n-1)) {
- ref <- allLevels[i]
- contrastLevels <- allLevels[(i+1):n]
- for (lvl in contrastLevels) {
- res <- results(dds, contrast=c(primaryFactor, lvl, ref),
- cooksCutoff=cooksCutoff,
- independentFiltering=independentFiltering)
- resSorted <- res[order(res$padj),]
- outDF <- as.data.frame(resSorted)
- outDF$geneID <- rownames(outDF)
- outDF <- outDF[,c("geneID", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]
- filename <- paste0(primaryFactor,"_",lvl,"_vs_",ref)
- write.table(outDF, file=filename, sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE)
- if (independentFiltering) {
+ for (i in seq_len(n - 1)) {
+ ref <- all_levels[i]
+ contrast_levels <- all_levels[(i + 1):n]
+ for (lvl in contrast_levels) {
+ res <- results(
+ dds,
+ contrast = c(primary_factor, lvl, ref),
+ cooksCutoff = cooks_cutoff,
+ independentFiltering = independent_filtering
+ )
+ res_sorted <- res[order(res$padj), ]
+ out_df <- as.data.frame(res_sorted)
+ out_df$geneID <- rownames(out_df) # nolint
+ out_df <- out_df[, c("geneID", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]
+ filename <- paste0(primary_factor, "_", lvl, "_vs_", ref)
+ write.table(out_df, file = filename, sep = "\t", quote = FALSE, row.names = FALSE, col.names = FALSE)
+ if (independent_filtering) {
threshold <- unname(attr(res, "filterThreshold"))
} else {
threshold <- 0
}
- title_suffix <- paste0(primaryFactor,": ",lvl," vs ",ref)
+ title_suffix <- paste0(primary_factor, ": ", lvl, " vs ", ref)
if (!is.null(opt$plots)) {
- generateSpecificPlots(res, threshold, title_suffix)
+ generate_specific_plots(res, threshold, title_suffix)
}
}
}
@@ -378,4 +396,3 @@
cat("Session information:\n\n")
sessionInfo()
-
diff -r 71bacea10eee -r de44f8eff84a deseq2.xml
--- a/deseq2.xml Sat Feb 29 17:08:08 2020 -0500
+++ b/deseq2.xml Wed Nov 25 18:36:55 2020 +0000
@@ -176,7 +176,7 @@
label="Output VST normalized table" />
+ help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" />
+ filter genes which contain a Cook’s distance above a cutoff" />
+ help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" />
diff -r 71bacea10eee -r de44f8eff84a get_deseq_dataset.R
--- a/get_deseq_dataset.R Sat Feb 29 17:08:08 2020 -0500
+++ b/get_deseq_dataset.R Wed Nov 25 18:36:55 2020 +0000
@@ -1,76 +1,80 @@
-get_deseq_dataset <- function(sampleTable, header, designFormula, tximport, txtype, tx2gene) {
+get_deseq_dataset <- function(sample_table, header, design_formula, tximport, txtype, tx2gene) {
dir <- ""
- if (!is.null(header)) {
- hasHeader <- TRUE
- } else {
- hasHeader <- FALSE
- }
-
- if (!is.null(tximport)) {
+ has_header <- !is.null(header)
+ use_txi <- !is.null(tximport)
+ if (use_txi) {
if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport")
if (tolower(file_ext(tx2gene)) == "gff") {
- gffFile <-tx2gene
+ gff_file <- tx2gene
} else {
- gffFile <- NULL
- tx2gene <- read.table(tx2gene, header=hasHeader)
+ gff_file <- NULL
+ tx2gene <- read.table(tx2gene, header = has_header)
}
- useTXI <- TRUE
- } else {
- useTXI <- FALSE
}
- if (!useTXI & hasHeader) {
- countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)})
+ if (!use_txi & has_header) {
+ countfiles <- lapply(as.character(sample_table$filename), read.delim, row.names = 1)
tbl <- do.call("cbind", countfiles)
- colnames(tbl) <- rownames(sampleTable) # take sample ids from header
+ colnames(tbl) <- rownames(sample_table) # take sample ids from header
# check for htseq report lines (from DESeqDataSetFromHTSeqCount function)
- oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual",
- "not_aligned", "alignment_not_unique")
- specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames
- tbl <- tbl[!specialRows, , drop = FALSE]
+ old_special_names <- c(
+ "no_feature",
+ "ambiguous",
+ "too_low_aQual",
+ "not_aligned",
+ "alignment_not_unique"
+ )
+ special_rows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% old_special_names
+ tbl <- tbl[!special_rows, , drop = FALSE]
- dds <- DESeqDataSetFromMatrix(countData = tbl,
- colData = subset(sampleTable, select=-(filename)),
- design = designFormula)
- } else if (!useTXI & !hasHeader) {
+ dds <- DESeqDataSetFromMatrix(
+ countData = tbl,
+ colData = subset(sample_table, select = -filename),
+ design = design_formula
+ )
+ } else if (!use_txi & !has_header) {
# construct the object from HTSeq files
- dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
- directory = dir,
- design = designFormula)
- colnames(dds) <- row.names(sampleTable)
+ dds <- DESeqDataSetFromHTSeqCount(
+ sampleTable = sample_table,
+ directory = dir,
+ design = design_formula
+ )
+ colnames(dds) <- row.names(sample_table)
} else {
# construct the object using tximport
library("tximport")
- txiFiles <- as.character(sampleTable$filename)
- labs <- row.names(sampleTable)
- names(txiFiles) <- labs
- if (!is.null(gffFile)) {
+ txi_files <- as.character(sample_table$filename)
+ labs <- row.names(sample_table)
+ names(txi_files) <- labs
+ if (!is.null(gff_file)) {
# first need to make the tx2gene table
# this takes ~2-3 minutes using Bioconductor functions
suppressPackageStartupMessages({
library("GenomicFeatures")
})
- txdb <- makeTxDbFromGFF(gffFile)
+ txdb <- makeTxDbFromGFF(gff_file)
k <- keys(txdb, keytype = "TXNAME")
- tx2gene <- select(txdb, keys=k, columns="GENEID", keytype="TXNAME")
- # Remove 'transcript:' from transcript IDs (when gffFile is a GFF3 from Ensembl and the transcript does not have a Name)
- tx2gene$TXNAME <- sub('^transcript:', '', tx2gene$TXNAME)
+ tx2gene <- select(txdb, keys = k, columns = "GENEID", keytype = "TXNAME")
+ # Remove 'transcript:' from transcript IDs (when gff_file is a GFF3 from Ensembl and the transcript does not have a Name)
+ tx2gene$TXNAME <- sub("^transcript:", "", tx2gene$TXNAME) # nolint
}
- try(txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene))
+ try(txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene))
if (!exists("txi")) {
# Remove version from transcript IDs in tx2gene...
- tx2gene$TXNAME <- sub('\\.[0-9]+$', '', tx2gene$TXNAME)
- # ...and in txiFiles
- txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene, ignoreTxVersion=TRUE)
+ tx2gene$TXNAME <- sub("\\.[0-9]+$", "", tx2gene$TXNAME) # nolint
+ # ...and in txi_files
+ txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene, ignoreTxVersion = TRUE)
}
- dds <- DESeqDataSetFromTximport(txi,
- subset(sampleTable, select=-c(filename)),
- designFormula)
+ dds <- DESeqDataSetFromTximport(
+ txi,
+ subset(sample_table, select = -c(filename)),
+ design_formula
+ )
}
return(dds)
}