| Previous changeset 24:71bacea10eee (2020-02-29) Next changeset 26:6a3a025714d3 (2021-01-08) |
|
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b" |
|
modified:
deseq2.R deseq2.xml get_deseq_dataset.R |
| b |
| diff -r 71bacea10eee -r de44f8eff84a deseq2.R --- a/deseq2.R Sat Feb 29 17:08:08 2020 -0500 +++ b/deseq2.R Wed Nov 25 18:36:55 2020 +0000 |
| [ |
| b'@@ -31,7 +31,9 @@\n # 3 "mean"\n \n # setup R error handling to go to stderr\n-options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )\n+options(show.error.messages = F, error = function() {\n+ cat(geterrmessage(), file = stderr()); q("no", 1, F)\n+})\n \n # we need that to not crash galaxy with an UTF8 error on German LC settings.\n loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")\n@@ -55,50 +57,46 @@\n "header", "H", 0, "logical",\n "factors", "f", 1, "character",\n "files_to_labels", "l", 1, "character",\n- "plots" , "p", 1, "character",\n+ "plots", "p", 1, "character",\n "tximport", "i", 0, "logical",\n "txtype", "y", 1, "character",\n "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF/GFF3 file\n "esf", "e", 1, "character",\n "fit_type", "t", 1, "integer",\n "many_contrasts", "m", 0, "logical",\n- "outlier_replace_off" , "a", 0, "logical",\n- "outlier_filter_off" , "b", 0, "logical",\n+ "outlier_replace_off", "a", 0, "logical",\n+ "outlier_filter_off", "b", 0, "logical",\n "auto_mean_filter_off", "c", 0, "logical",\n- "beta_prior_off", "d", 0, "logical"),\n- byrow=TRUE, ncol=4)\n+ "beta_prior_off", "d", 0, "logical"\n+), byrow = TRUE, ncol = 4)\n opt <- getopt(spec)\n \n # if help was asked for print a friendly message\n # and exit with a non-zero error code\n if (!is.null(opt$help)) {\n- cat(getopt(spec, usage=TRUE))\n- q(status=1)\n+ cat(getopt(spec, usage = TRUE))\n+ q(status = 1)\n }\n \n # enforce the following required arguments\n if (is.null(opt$outfile)) {\n cat("\'outfile\' is required\\n")\n- q(status=1)\n+ q(status = 1)\n }\n if (is.null(opt$factors)) {\n cat("\'factors\' is required\\n")\n- q(status=1)\n+ q(status = 1)\n }\n \n-verbose <- if (is.null(opt$quiet)) {\n- TRUE\n-} else {\n- FALSE\n+verbose <- is.null(opt$quiet)\n+\n+source_local <- function(fname) {\n+ argv <- commandArgs(trailingOnly = FALSE)\n+ base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))\n+ source(paste(base_dir, fname, sep = "/"))\n }\n \n-source_local <- function(fname){\n- argv <- commandArgs(trailingOnly = FALSE)\n- base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))\n- source(paste(base_dir, fname, sep="/"))\n-}\n-\n-source_local(\'get_deseq_dataset.R\')\n+source_local("get_deseq_dataset.R")\n \n suppressPackageStartupMessages({\n library("DESeq2")\n@@ -109,102 +107,116 @@\n if (opt$cores > 1) {\n library("BiocParallel")\n register(MulticoreParam(opt$cores))\n- parallel = TRUE\n+ parallel <- TRUE\n } else {\n- parallel = FALSE\n+ parallel <- FALSE\n }\n \n # build or read sample table\n \n-trim <- function (x) gsub("^\\\\s+|\\\\s+$", "", x)\n+trim <- function(x) gsub("^\\\\s+|\\\\s+$", "", x)\n \n # switch on if \'factors\' was provided:\n library("rjson")\n parser <- newJSONParser()\n parser$addData(opt$factors)\n-factorList <- parser$getObject()\n+factor_list <- parser$getObject()\n filenames_to_labels <- fromJSON(opt$files_to_labels)\n-factors <- sapply(factorList, function(x) x[[1]])\n-primaryFactor <- factors[1]\n-filenamesIn <- unname(unlist(factorList[[1]][[2]]))\n-labs = unname(unlist(filenames_to_labels[basename(filenamesIn)]))\n-sampleTable <- data.frame(sample=basename(filenamesIn),\n- filename=filenamesIn,\n- row.names=filenamesIn,\n- stringsAsFactors=FALSE)\n-for (factor in factorList) {\n- factorName <- trim(factor[[1]])\n- sampleTable[[factorName]] <- character(nrow(sampleTable))\n+factors <- sapply(factor_list, function(x) x[[1]])\n+primary_factor <- factors[1]\n+filenames_in <- unname(unlist(factor_list[[1]][[2]]))\n+labs <- unname(unlist(filenames_to_labels[basename(filenames_in)]))\n+sample_table <- data.frame(\n+ sample = basename(filenames_in),\n+ filename = filenames_in,\n+ row.names = filenames_in,\n+ stringsAsFactors = FALSE\n+)\n+for (factor in factor_list) {\n+ factor_name <- trim(factor[[1]])\n+ sample_table[[factor_name]] <- character(nrow(sample_table))\n lvls <- sapply(factor[[2]], function('..b'is.null(opt$many_contrasts)) {\n # only contrast the first and second level of the primary factor\n- ref <- allLevels[1]\n- lvl <- allLevels[2]\n- res <- results(dds, contrast=c(primaryFactor, lvl, ref),\n- cooksCutoff=cooksCutoff,\n- independentFiltering=independentFiltering)\n+ ref <- all_levels[1]\n+ lvl <- all_levels[2]\n+ res <- results(\n+ dds,\n+ contrast = c(primary_factor, lvl, ref),\n+ cooksCutoff = cooks_cutoff,\n+ independentFiltering = independent_filtering\n+ )\n if (verbose) {\n cat("summary of results\\n")\n- cat(paste0(primaryFactor,": ",lvl," vs ",ref,"\\n"))\n+ cat(paste0(primary_factor, ": ", lvl, " vs ", ref, "\\n"))\n print(summary(res))\n }\n- resSorted <- res[order(res$padj),]\n- outDF <- as.data.frame(resSorted)\n- outDF$geneID <- rownames(outDF)\n- outDF <- outDF[,c("geneID", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]\n+ res_sorted <- res[order(res$padj), ]\n+ out_df <- as.data.frame(res_sorted)\n+ out_df$geneID <- rownames(out_df) # nolint\n+ out_df <- out_df[, c("geneID", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]\n filename <- opt$outfile\n- write.table(outDF, file=filename, sep="\\t", quote=FALSE, row.names=FALSE, col.names=FALSE)\n- if (independentFiltering) {\n+ write.table(out_df, file = filename, sep = "\\t", quote = FALSE, row.names = FALSE, col.names = FALSE)\n+ if (independent_filtering) {\n threshold <- unname(attr(res, "filterThreshold"))\n } else {\n threshold <- 0\n }\n- title_suffix <- paste0(primaryFactor,": ",lvl," vs ",ref)\n+ title_suffix <- paste0(primary_factor, ": ", lvl, " vs ", ref)\n if (!is.null(opt$plots)) {\n- generateSpecificPlots(res, threshold, title_suffix)\n+ generate_specific_plots(res, threshold, title_suffix)\n }\n } else {\n # rotate through the possible contrasts of the primary factor\n # write out a sorted table of results with the contrast as a suffix\n # add contrast specific plots to the device\n- for (i in seq_len(n-1)) {\n- ref <- allLevels[i]\n- contrastLevels <- allLevels[(i+1):n]\n- for (lvl in contrastLevels) {\n- res <- results(dds, contrast=c(primaryFactor, lvl, ref),\n- cooksCutoff=cooksCutoff,\n- independentFiltering=independentFiltering)\n- resSorted <- res[order(res$padj),]\n- outDF <- as.data.frame(resSorted)\n- outDF$geneID <- rownames(outDF)\n- outDF <- outDF[,c("geneID", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]\n- filename <- paste0(primaryFactor,"_",lvl,"_vs_",ref)\n- write.table(outDF, file=filename, sep="\\t", quote=FALSE, row.names=FALSE, col.names=FALSE)\n- if (independentFiltering) {\n+ for (i in seq_len(n - 1)) {\n+ ref <- all_levels[i]\n+ contrast_levels <- all_levels[(i + 1):n]\n+ for (lvl in contrast_levels) {\n+ res <- results(\n+ dds,\n+ contrast = c(primary_factor, lvl, ref),\n+ cooksCutoff = cooks_cutoff,\n+ independentFiltering = independent_filtering\n+ )\n+ res_sorted <- res[order(res$padj), ]\n+ out_df <- as.data.frame(res_sorted)\n+ out_df$geneID <- rownames(out_df) # nolint\n+ out_df <- out_df[, c("geneID", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]\n+ filename <- paste0(primary_factor, "_", lvl, "_vs_", ref)\n+ write.table(out_df, file = filename, sep = "\\t", quote = FALSE, row.names = FALSE, col.names = FALSE)\n+ if (independent_filtering) {\n threshold <- unname(attr(res, "filterThreshold"))\n } else {\n threshold <- 0\n }\n- title_suffix <- paste0(primaryFactor,": ",lvl," vs ",ref)\n+ title_suffix <- paste0(primary_factor, ": ", lvl, " vs ", ref)\n if (!is.null(opt$plots)) {\n- generateSpecificPlots(res, threshold, title_suffix)\n+ generate_specific_plots(res, threshold, title_suffix)\n }\n }\n }\n@@ -378,4 +396,3 @@\n cat("Session information:\\n\\n")\n \n sessionInfo()\n-\n' |
| b |
| diff -r 71bacea10eee -r de44f8eff84a deseq2.xml --- a/deseq2.xml Sat Feb 29 17:08:08 2020 -0500 +++ b/deseq2.xml Wed Nov 25 18:36:55 2020 +0000 |
| b |
| @@ -176,7 +176,7 @@ label="Output VST normalized table" /> <param name="many_contrasts" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Output all levels vs all levels of primary factor (use when you have >2 levels for primary factor)" - help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" /> + help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" /> <param name="esf" type="select" label="(Optional) Method for estimateSizeFactors" help="Method for estimation: either 'ratio', 'poscounts', or 'iterate'. 'ratio' uses the standard median ratio method introduced in DESeq. The size factor is the median ratio of the sample over a 'pseudosample': for each gene, the geometric mean of all samples. @@ -203,10 +203,10 @@ <param name="outlier_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Turn off outliers filtering (only affects with >2 replicates)" help="When there are more than 2 replicates for a given sample, the DESeq2 will automatically - filter genes which contain a Cook’s distance above a cutoff" /> + filter genes which contain a Cook’s distance above a cutoff" /> <param name="auto_mean_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Turn off independent filtering" - help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" /> + help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" /> </inputs> <outputs> <data name="deseq_out" format="tabular" label="DESeq2 result file on ${on_string}"> |
| b |
| diff -r 71bacea10eee -r de44f8eff84a get_deseq_dataset.R --- a/get_deseq_dataset.R Sat Feb 29 17:08:08 2020 -0500 +++ b/get_deseq_dataset.R Wed Nov 25 18:36:55 2020 +0000 |
| [ |
| @@ -1,76 +1,80 @@ -get_deseq_dataset <- function(sampleTable, header, designFormula, tximport, txtype, tx2gene) { +get_deseq_dataset <- function(sample_table, header, design_formula, tximport, txtype, tx2gene) { dir <- "" - if (!is.null(header)) { - hasHeader <- TRUE - } else { - hasHeader <- FALSE - } - - if (!is.null(tximport)) { + has_header <- !is.null(header) + use_txi <- !is.null(tximport) + if (use_txi) { if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport") if (tolower(file_ext(tx2gene)) == "gff") { - gffFile <-tx2gene + gff_file <- tx2gene } else { - gffFile <- NULL - tx2gene <- read.table(tx2gene, header=hasHeader) + gff_file <- NULL + tx2gene <- read.table(tx2gene, header = has_header) } - useTXI <- TRUE - } else { - useTXI <- FALSE } - if (!useTXI & hasHeader) { - countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)}) + if (!use_txi & has_header) { + countfiles <- lapply(as.character(sample_table$filename), read.delim, row.names = 1) tbl <- do.call("cbind", countfiles) - colnames(tbl) <- rownames(sampleTable) # take sample ids from header + colnames(tbl) <- rownames(sample_table) # take sample ids from header # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) - oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual", - "not_aligned", "alignment_not_unique") - specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames - tbl <- tbl[!specialRows, , drop = FALSE] + old_special_names <- c( + "no_feature", + "ambiguous", + "too_low_aQual", + "not_aligned", + "alignment_not_unique" + ) + special_rows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% old_special_names + tbl <- tbl[!special_rows, , drop = FALSE] - dds <- DESeqDataSetFromMatrix(countData = tbl, - colData = subset(sampleTable, select=-(filename)), - design = designFormula) - } else if (!useTXI & !hasHeader) { + dds <- DESeqDataSetFromMatrix( + countData = tbl, + colData = subset(sample_table, select = -filename), + design = design_formula + ) + } else if (!use_txi & !has_header) { # construct the object from HTSeq files - dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, - directory = dir, - design = designFormula) - colnames(dds) <- row.names(sampleTable) + dds <- DESeqDataSetFromHTSeqCount( + sampleTable = sample_table, + directory = dir, + design = design_formula + ) + colnames(dds) <- row.names(sample_table) } else { # construct the object using tximport library("tximport") - txiFiles <- as.character(sampleTable$filename) - labs <- row.names(sampleTable) - names(txiFiles) <- labs - if (!is.null(gffFile)) { + txi_files <- as.character(sample_table$filename) + labs <- row.names(sample_table) + names(txi_files) <- labs + if (!is.null(gff_file)) { # first need to make the tx2gene table # this takes ~2-3 minutes using Bioconductor functions suppressPackageStartupMessages({ library("GenomicFeatures") }) - txdb <- makeTxDbFromGFF(gffFile) + txdb <- makeTxDbFromGFF(gff_file) k <- keys(txdb, keytype = "TXNAME") - tx2gene <- select(txdb, keys=k, columns="GENEID", keytype="TXNAME") - # Remove 'transcript:' from transcript IDs (when gffFile is a GFF3 from Ensembl and the transcript does not have a Name) - tx2gene$TXNAME <- sub('^transcript:', '', tx2gene$TXNAME) + tx2gene <- select(txdb, keys = k, columns = "GENEID", keytype = "TXNAME") + # Remove 'transcript:' from transcript IDs (when gff_file is a GFF3 from Ensembl and the transcript does not have a Name) + tx2gene$TXNAME <- sub("^transcript:", "", tx2gene$TXNAME) # nolint } - try(txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene)) + try(txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene)) if (!exists("txi")) { # Remove version from transcript IDs in tx2gene... - tx2gene$TXNAME <- sub('\\.[0-9]+$', '', tx2gene$TXNAME) - # ...and in txiFiles - txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene, ignoreTxVersion=TRUE) + tx2gene$TXNAME <- sub("\\.[0-9]+$", "", tx2gene$TXNAME) # nolint + # ...and in txi_files + txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene, ignoreTxVersion = TRUE) } - dds <- DESeqDataSetFromTximport(txi, - subset(sampleTable, select=-c(filename)), - designFormula) + dds <- DESeqDataSetFromTximport( + txi, + subset(sample_table, select = -c(filename)), + design_formula + ) } return(dds) } |