Mercurial > repos > iuc > deseq2

--- a/deseq2.R	Wed Feb 21 00:06:27 2018 -0500
+++ b/deseq2.R	Thu Apr 12 17:29:45 2018 -0400
@@ -3,13 +3,10 @@
 # A command-line interface to DESeq2 for use with Galaxy
 # written by Bjoern Gruening and modified by Michael Love 2016.03.30
 #
-# one of these arguments is required:
+# This argument is required:
 #
 #   'factors' a JSON list object from Galaxy
 #
-#   'sample_table' is a sample table as described in ?DESeqDataSetFromHTSeqCount
-#   with columns: sample name, filename, then factors (variables)
-#
 # the output file has columns:
 #
 #   baseMean (mean normalized count)
@@ -19,8 +16,8 @@
 #   pvalue (p-value from comparison of Wald statistic to a standard Normal)
 #   padj (adjusted p-value, Benjamini Hochberg correction on genes which pass the mean count filter)
 #
-# the first variable in 'factors' and first column in 'sample_table' will be the primary factor.
-# the levels of the primary factor are used in the order of appearance in factors or in sample_table.
+# the first variable in 'factors' will be the primary factor.
+# the levels of the primary factor are used in the order of appearance in factors.
 #
 # by default, levels in the order A,B,C produces a single comparison of B vs A, to a single file 'outfile'
 #
@@ -54,7 +51,6 @@
   "factors", "f", 1, "character",
   "files_to_labels", "l", 1, "character",
   "plots" , "p", 1, "character",
-  "sample_table", "s", 1, "character",
   "tximport", "i", 0, "logical",
   "txtype", "y", 1, "character",
   "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF)
@@ -79,8 +75,8 @@
   cat("'outfile' is required\n")
   q(status=1)
 }
-if (is.null(opt$sample_table) & is.null(opt$factors)) {
-  cat("'factors' or 'sample_table' is required\n")
+if (is.null(opt$factors)) {
+  cat("'factors' is required\n")
   q(status=1)
 }

@@ -114,43 +110,30 @@
 trim <- function (x) gsub("^\\s+|\\s+$", "", x)

 # switch on if 'factors' was provided:
-if (!is.null(opt$factors)) {
-  library("rjson")
-  parser <- newJSONParser()
-  parser$addData(opt$factors)
-  factorList <- parser$getObject()
-  filenames_to_labels <- fromJSON(opt$files_to_labels)
-  factors <- sapply(factorList, function(x) x[[1]])
-  primaryFactor <- factors[1]
-  filenamesIn <- unname(unlist(factorList[[1]][[2]]))
-  labs = unname(unlist(filenames_to_labels[basename(filenamesIn)]))
-  sampleTable <- data.frame(sample=basename(filenamesIn),
-                            filename=filenamesIn,
-                            row.names=filenamesIn,
-                            stringsAsFactors=FALSE)
-  for (factor in factorList) {
-    factorName <- trim(factor[[1]])
-    sampleTable[[factorName]] <- character(nrow(sampleTable))
-    lvls <- sapply(factor[[2]], function(x) names(x))
-    for (i in seq_along(factor[[2]])) {
-      files <- factor[[2]][[i]][[1]]
-      sampleTable[files,factorName] <- trim(lvls[i])
-    }
-    sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls)
+library("rjson")
+parser <- newJSONParser()
+parser$addData(opt$factors)
+factorList <- parser$getObject()
+filenames_to_labels <- fromJSON(opt$files_to_labels)
+factors <- sapply(factorList, function(x) x[[1]])
+primaryFactor <- factors[1]
+filenamesIn <- unname(unlist(factorList[[1]][[2]]))
+labs = unname(unlist(filenames_to_labels[basename(filenamesIn)]))
+sampleTable <- data.frame(sample=basename(filenamesIn),
+                          filename=filenamesIn,
+                          row.names=filenamesIn,
+                          stringsAsFactors=FALSE)
+for (factor in factorList) {
+  factorName <- trim(factor[[1]])
+  sampleTable[[factorName]] <- character(nrow(sampleTable))
+  lvls <- sapply(factor[[2]], function(x) names(x))
+  for (i in seq_along(factor[[2]])) {
+    files <- factor[[2]][[i]][[1]]
+    sampleTable[files,factorName] <- trim(lvls[i])
   }
-  rownames(sampleTable) <- labs
-} else {
-  # read the sample_table argument
-  # this table is described in ?DESeqDataSet
-  # one column for the sample name, one for the filename, and
-  # the remaining columns for factors in the analysis
-  sampleTable <- read.delim(opt$sample_table, stringsAsFactors=FALSE)
-  factors <- colnames(sampleTable)[-c(1:2)]
-  for (factor in factors) {
-    lvls <- unique(as.character(sampleTable[[factor]]))
-    sampleTable[[factor]] <- factor(sampleTable[[factor]], levels=lvls)
-  }
+  sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls)
 }
+rownames(sampleTable) <- labs

 primaryFactor <- factors[1]
 designFormula <- as.formula(paste("~", paste(rev(factors), collapse=" + ")))
@@ -216,13 +199,8 @@
   cat("\n---------------------\n")
 }

-# if JSON input from Galaxy, path is absolute
-# otherwise, from sample_table, assume it is relative
-dir <- if (is.null(opt$factors)) {
-  "."
-} else {
-  ""
-}
+# For JSON input from Galaxy, path is absolute
+dir <- ""

 if (!useTXI) {
   # construct the object from HTSeq files
--- a/deseq2.xml	Wed Feb 21 00:06:27 2018 -0500
+++ b/deseq2.xml	Thu Apr 12 17:29:45 2018 -0400
@@ -105,7 +105,7 @@

         <conditional name="tximport">
             <param name="tximport_selector" type="select" label="Choice of Input data">
-                <option value="count" selected="True">Count data (e.g. from HTSeq-count or featureCounts)</option>
+                <option value="count" selected="True">Count data (e.g. from HTSeq-count, featureCounts or StringTie)</option>
                 <option value="tximport">TPM values (e.g. from kallisto, sailfish or salmon)</option>
             </param>
             <when value="tximport">
@@ -226,7 +226,7 @@

 **Count Files**

-DESeq2_ takes count tables generated from **featureCounts** or **HTSeq-count** as input. Count tables must be generated for each sample individually. DESeq2 is capable of handling multiple factors that affect your experiment. The first factor you input is considered as the primary factor that affects gene expressions. Optionally, you can input one or more secondary factors that might influence your experiment. But the final output will be changes in genes due to primary factor in presence of secondary factors. Each factor has two levels/states. You need to select appropriate count table from your history for each factor level.
+DESeq2_ takes count tables generated from **featureCounts**, **HTSeq-count** or **StringTie** as input. Count tables must be generated for each sample individually, should have no header rows, and rows should be in the same order. DESeq2 is capable of handling multiple factors that affect your experiment. The first factor you input is considered as the primary factor that affects gene expressions. Optionally, you can input one or more secondary factors that might influence your experiment. But the final output will be changes in genes due to primary factor in presence of secondary factors. Each factor has two levels/states. You need to select appropriate count table from your history for each factor level.

 The following table gives some examples of factors and their levels: