Galaxy |

Changeset 18:3bf1b3ec1ddf (2018-11-16)

Previous changeset 17:d9e5cadc7f0b (2018-09-05) Next changeset 19:c56e0689e46e (2018-12-04)

Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 448dccb0c02aba00d8301247b0f0f406ab3d4fa2

modified:
deseq2.R
deseq2.xml

diff -r d9e5cadc7f0b -r 3bf1b3ec1ddf deseq2.R
--- a/deseq2.R Wed Sep 05 15:54:03 2018 -0400
+++ b/deseq2.R Fri Nov 16 14:47:19 2018 -0500

[

@@ -49,6 +49,8 @@
   "batch_factors", "", 1, "character",
   "outfile", "o", 1, "character",
   "countsfile", "n", 1, "character",
+  "rlogfile", "r", 1, "character",
+  "vstfile", "v", 1, "character",
   "header", "H", 0, "logical",
   "factors", "f", 1, "character",
   "files_to_labels", "l", 1, "character",
@@ -56,6 +58,7 @@
   "tximport", "i", 0, "logical",
   "txtype", "y", 1, "character",
   "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF)
+  "esf", "e", 1, "character",
   "fit_type", "t", 1, "integer",
   "many_contrasts", "m", 0, "logical",
   "outlier_replace_off" , "a", 0, "logical",
@@ -188,7 +191,10 @@
}

dds <- get_deseq_dataset(sampleTable, header=opt$header, designFormula=designFormula, tximport=opt$tximport, txtype=opt$txtype, tx2gene=opt$tx2gene)
-
+# estimate size factors for the chosen method
+if(!is.null(opt$esf)){
+    dds <- estimateSizeFactors(dds, type=opt$esf)
+}
apply_batch_factors <- function (dds, batch_factors) {
   rownames(batch_factors) <- batch_factors$identifier
   batch_factors <- subset(batch_factors, select = -c(identifier, condition))
@@ -284,6 +290,19 @@
     write.table(normalizedCounts, file=opt$countsfile, sep="\t", col.names=NA, quote=FALSE)
}

+if (!is.null(opt$rlogfile)) {
+    rLogNormalized <-rlogTransformation(dds)
+    rLogNormalizedMat <- assay(rLogNormalized)
+    write.table(rLogNormalizedMat, file=opt$rlogfile, sep="\t", col.names=NA, quote=FALSE)
+}
+
+if (!is.null(opt$vstfile)) {
+    vstNormalized<-varianceStabilizingTransformation(dds)
+    vstNormalizedMat <- assay(vstNormalized)
+    write.table(vstNormalizedMat, file=opt$vstfile, sep="\t", col.names=NA, quote=FALSE)
+}
+
+
if (is.null(opt$many_contrasts)) {
   # only contrast the first and second level of the primary factor
   ref <- allLevels[1]

diff -r d9e5cadc7f0b -r 3bf1b3ec1ddf deseq2.xml
--- a/deseq2.xml Wed Sep 05 15:54:03 2018 -0400
+++ b/deseq2.xml Fri Nov 16 14:47:19 2018 -0500

b'@@ -1,4 +1,4 @@\n-<tool id="deseq2" name="DESeq2" version="2.11.40.2">\n+<tool id="deseq2" name="DESeq2" version="2.11.40.3">\n <description>Determines differentially expressed features from count tables</description>\n <requirements>\n <requirement type="package" version="1.18.1">bioconductor-deseq2</requirement>\n@@ -43,6 +43,12 @@\n #if $normCounts:\n -n \'$counts_out\'\n #end if\n+ #if $normRLog:\n+ -r \'$rlog_out\'\n+ #end if\n+ #if $normVST:\n+ -v \'$vst_out\'\n+ #end if\n #set $filename_to_element_identifiers = {}\n #set $temp_factor_names = list()\n #for $factor in $rep_factorName:\n@@ -63,8 +69,11 @@\n \n -f \'#echo json.dumps(temp_factor_names)#\'\n -l \'#echo json.dumps(filename_to_element_identifiers)#\'\n+ #if $esf:\n+ -e $esf\n+ #end if\n -t $fit_type\n- #if $batch_factors\n+ #if $batch_factors:\n --batch_factors \'$batch_factors\'\n #end if\n #if $outlier_replace_off:\n@@ -142,9 +151,26 @@\n help="output an additional PDF files" />\n <param name="normCounts" type="boolean" truevalue="1" falsevalue="0" checked="false"\n label="Output normalized counts table" />\n+ <param name="normRLog" type="boolean" truevalue="1" falsevalue="0" checked="false"\n+ label="Output rLog normalized table" />\n+ <param name="normVST" type="boolean" truevalue="1" falsevalue="0" checked="false"\n+ label="Output VST normalized table" />\n <param name="many_contrasts" type="boolean" truevalue="1" falsevalue="0" checked="false"\n label="Output all levels vs all levels of primary factor (use when you have >2 levels for primary factor)"\n help=" DESeq2 performs independent \xef\xac\x81ltering by default using the mean of normalized counts as a \xef\xac\x81lter statistic" />\n+ <param name="esf" type="select" label="(Optional) Method for estimateSizeFactors" \n+ help="Method for estimation: either \'ratio\', \'poscounts\', or \'iterate\'. \'ratio\' uses the standard median ratio method introduced in DESeq. \n+ The size factor is the median ratio of the sample over a \'pseudosample\': for each gene, the geometric mean of all samples. \n+ \'poscounts\' and \'iterate\' offer alternative estimators, which can be used even when all genes contain a sample with a zero (a problem \n+ for the default method, as the geometric mean becomes zero, and the ratio undefined). The \'poscounts\' estimator deals with a gene with \n+ some zeros, by calculating a modified geometric mean by taking the n-th root of the product of the non-zero counts. This evolved out of \n+ use cases with Paul McMurdie\'s phyloseq package for metagenomic samples. The \'iterate\' estimator iterates between estimating the dispersion \n+ with a design of ~1, and finding a size factor vector by numerically optimizing the likelihood of the ~1 model.">\n+ <option value="" selected="true">No Selection (use default)</option>\n+ <option value="ratio">ratio</option>\n+ <option value="poscounts">poscounts</option>\n+ <option value="iterate">iterate</option>\n+ </param>\n <param name="fit_type" type="select" label="Fit type">\n <option value="1" selected="true">parametric</option>\n <option value="2">local</option>\n@@ -180,10 +206,16 @@\n <data format="tabular" name="counts_out" label="Normalized counts file on ${on_string}">\n <filter>normCounts == True</filter>\n </data>\n+ <data format="tabular" name="rlog_out" label="rLog-Normalized counts file on ${on_string}">\n+ <filter>normRLog == True</filter>\n+ </data>\n+ <data format="tabular" name="vst_out" label="VST-Normalized counts file on ${on_string}">\n+ <filter>normVST == True</filter>\n+ </data>\n </outputs>\n <tests>\n \n- <test expect_num_outputs="2">\n+ <test expect_num_outputs="4">\n <repeat name="rep_factorName">\n <param name="factorName" value="Treatment"/>\n <repeat name="rep_factorLevel">\n@@ -247,12 +293,26 @@\n <param name="header" value="False"/>\n <param name="pdf" value="False"/>\n <param name="normCounts" value="True"/>\n+ <param name="normRLog" value="True"/>\n+ <param name="normVST" value="True"/>\n <output name="counts_out">\n <assert_contents>\n <has_text_matching expression="GSM461176_untreat_single.counts.noheader\\tGSM461177_untreat_paired.counts.noheader\\tGSM461178_untreat_paired.counts.noheader\\tGSM461182_untreat_single.counts.noheader\\tGSM461179_treat_single.counts.noheader\\tGSM461180_treat_paired.counts.noheader\\tGSM461181_treat_paired.counts.noheader" />\n <has_text_matching expression="FBgn0000003\\t0\\t0\\t0\\t0\\t0\\t0\\t0" />\n </assert_contents>\n </output>\n+ <output name="rlog_out">\n+ <assert_contents>\n+ <has_text_matching expression="GSM461176_untreat_single.counts.noheader\\tGSM461177_untreat_paired.counts.noheader\\tGSM461178_untreat_paired.counts.noheader\\tGSM461182_untreat_single.counts.noheader\\tGSM461179_treat_single.counts.noheader\\tGSM461180_treat_paired.counts.noheader\\tGSM461181_treat_paired.counts.noheader" />\n+ <has_text_matching expression="FBgn0000003\\t0\\t0\\t0\\t0\\t0\\t0\\t0" />\n+ </assert_contents>\n+ </output>\n+ <output name="vst_out">\n+ <assert_contents>\n+ <has_text_matching expression="GSM461176_untreat_single.counts.noheader\\tGSM461177_untreat_paired.counts.noheader\\tGSM461178_untreat_paired.counts.noheader\\tGSM461182_untreat_single.counts.noheader\\tGSM461179_treat_single.counts.noheader\\tGSM461180_treat_paired.counts.noheader\\tGSM461181_treat_paired.counts.noheader" />\n+ <has_text_matching expression="FBgn0000003\\t5.*\\t5.*\\t5.*\\t5.*\\t5.*\\t5.*\\t5.*" />\n+ </assert_contents>\n+ </output>\n <output name="deseq_out" >\n <assert_contents>\n <has_text_matching expression="FBgn0003360\\t1933.9504.*\\t-2.8399.*\\t0.1309.*-21.6851.*2.831.*8.024" />\n'