Repository 'deseq2'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/deseq2

Changeset 18:3bf1b3ec1ddf (2018-11-16)
Previous changeset 17:d9e5cadc7f0b (2018-09-05) Next changeset 19:c56e0689e46e (2018-12-04)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 448dccb0c02aba00d8301247b0f0f406ab3d4fa2
modified:
deseq2.R
deseq2.xml
b
diff -r d9e5cadc7f0b -r 3bf1b3ec1ddf deseq2.R
--- a/deseq2.R Wed Sep 05 15:54:03 2018 -0400
+++ b/deseq2.R Fri Nov 16 14:47:19 2018 -0500
[
@@ -49,6 +49,8 @@
   "batch_factors", "", 1, "character",
   "outfile", "o", 1, "character",
   "countsfile", "n", 1, "character",
+  "rlogfile", "r", 1, "character",
+  "vstfile", "v", 1, "character",
   "header", "H", 0, "logical",
   "factors", "f", 1, "character",
   "files_to_labels", "l", 1, "character",
@@ -56,6 +58,7 @@
   "tximport", "i", 0, "logical",
   "txtype", "y", 1, "character",
   "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF)
+  "esf", "e", 1, "character",
   "fit_type", "t", 1, "integer",
   "many_contrasts", "m", 0, "logical",
   "outlier_replace_off" , "a", 0, "logical",
@@ -188,7 +191,10 @@
 }
 
 dds <- get_deseq_dataset(sampleTable, header=opt$header, designFormula=designFormula, tximport=opt$tximport, txtype=opt$txtype, tx2gene=opt$tx2gene)
-
+# estimate size factors for the chosen method
+if(!is.null(opt$esf)){
+    dds <- estimateSizeFactors(dds, type=opt$esf)
+}
 apply_batch_factors <- function (dds, batch_factors) {
   rownames(batch_factors) <- batch_factors$identifier
   batch_factors <- subset(batch_factors, select = -c(identifier, condition))
@@ -284,6 +290,19 @@
     write.table(normalizedCounts, file=opt$countsfile, sep="\t", col.names=NA, quote=FALSE)
 }
 
+if (!is.null(opt$rlogfile)) {
+    rLogNormalized <-rlogTransformation(dds)
+    rLogNormalizedMat <- assay(rLogNormalized)
+    write.table(rLogNormalizedMat, file=opt$rlogfile, sep="\t", col.names=NA, quote=FALSE)
+}
+
+if (!is.null(opt$vstfile)) {
+    vstNormalized<-varianceStabilizingTransformation(dds)
+    vstNormalizedMat <- assay(vstNormalized)
+    write.table(vstNormalizedMat, file=opt$vstfile, sep="\t", col.names=NA, quote=FALSE)
+}
+
+
 if (is.null(opt$many_contrasts)) {
   # only contrast the first and second level of the primary factor
   ref <- allLevels[1]
b
diff -r d9e5cadc7f0b -r 3bf1b3ec1ddf deseq2.xml
--- a/deseq2.xml Wed Sep 05 15:54:03 2018 -0400
+++ b/deseq2.xml Fri Nov 16 14:47:19 2018 -0500
b
b'@@ -1,4 +1,4 @@\n-<tool id="deseq2" name="DESeq2" version="2.11.40.2">\n+<tool id="deseq2" name="DESeq2" version="2.11.40.3">\n     <description>Determines differentially expressed features from count tables</description>\n     <requirements>\n         <requirement type="package" version="1.18.1">bioconductor-deseq2</requirement>\n@@ -43,6 +43,12 @@\n     #if $normCounts:\n         -n \'$counts_out\'\n     #end if\n+    #if $normRLog:\n+        -r \'$rlog_out\'\n+    #end if\n+    #if $normVST:\n+        -v \'$vst_out\'\n+    #end if\n     #set $filename_to_element_identifiers = {}\n     #set $temp_factor_names = list()\n     #for $factor in $rep_factorName:\n@@ -63,8 +69,11 @@\n \n     -f \'#echo json.dumps(temp_factor_names)#\'\n     -l \'#echo json.dumps(filename_to_element_identifiers)#\'\n+    #if $esf:\n+        -e $esf\n+    #end if\n     -t $fit_type\n-    #if $batch_factors\n+    #if $batch_factors:\n         --batch_factors \'$batch_factors\'\n     #end if\n     #if $outlier_replace_off:\n@@ -142,9 +151,26 @@\n             help="output an additional PDF files" />\n         <param name="normCounts" type="boolean" truevalue="1" falsevalue="0" checked="false"\n             label="Output normalized counts table" />\n+        <param name="normRLog" type="boolean" truevalue="1" falsevalue="0" checked="false"\n+            label="Output rLog normalized table" />\n+        <param name="normVST" type="boolean" truevalue="1" falsevalue="0" checked="false"\n+            label="Output VST normalized table" />\n         <param name="many_contrasts" type="boolean" truevalue="1" falsevalue="0" checked="false"\n             label="Output all levels vs all levels of primary factor (use when you have >2 levels for primary factor)"\n             help=" DESeq2 performs independent \xef\xac\x81ltering by default using the mean of normalized counts as a \xef\xac\x81lter statistic" />\n+        <param name="esf" type="select" label="(Optional) Method for estimateSizeFactors" \n+            help="Method for estimation: either \'ratio\', \'poscounts\', or \'iterate\'. \'ratio\' uses the standard median ratio method introduced in DESeq. \n+                The size factor is the median ratio of the sample over a \'pseudosample\': for each gene, the geometric mean of all samples. \n+                \'poscounts\' and \'iterate\' offer alternative estimators, which can be used even when all genes contain a sample with a zero (a problem \n+                for the default method, as the geometric mean becomes zero, and the ratio undefined). The \'poscounts\' estimator deals with a gene with \n+                some zeros, by calculating a modified geometric mean by taking the n-th root of the product of the non-zero counts. This evolved out of \n+                use cases with Paul McMurdie\'s phyloseq package for metagenomic samples. The \'iterate\' estimator iterates between estimating the dispersion \n+                with a design of ~1, and finding a size factor vector by numerically optimizing the likelihood of the ~1 model.">\n+            <option value="" selected="true">No Selection (use default)</option>\n+            <option value="ratio">ratio</option>\n+            <option value="poscounts">poscounts</option>\n+            <option value="iterate">iterate</option>\n+        </param>\n         <param name="fit_type" type="select" label="Fit type">\n             <option value="1" selected="true">parametric</option>\n             <option value="2">local</option>\n@@ -180,10 +206,16 @@\n         <data format="tabular" name="counts_out" label="Normalized counts file on ${on_string}">\n             <filter>normCounts == True</filter>\n         </data>\n+        <data format="tabular" name="rlog_out" label="rLog-Normalized counts file on ${on_string}">\n+            <filter>normRLog == True</filter>\n+        </data>\n+        <data format="tabular" name="vst_out" label="VST-Normalized counts file on ${on_string}">\n+            <filter>normVST == True</filter>\n+        </data>\n     </outputs>\n     <tests>\n         <!--Ensure counts files with header works '..b'le.counts\\tGSM461179_treat_single.counts\\tGSM461180_treat_paired.counts\\tGSM461181_treat_paired.counts" />\n                     <has_text_matching expression="FBgn0000003\\t0\\t0\\t0\\t0\\t0\\t0\\t0" />\n                 </assert_contents>\n             </output>\n+            <output name="rlog_out">\n+                <assert_contents>\n+                    <has_text_matching expression="GSM461176_untreat_single.counts\\tGSM461177_untreat_paired.counts\\tGSM461178_untreat_paired.counts\\tGSM461182_untreat_single.counts\\tGSM461179_treat_single.counts\\tGSM461180_treat_paired.counts\\tGSM461181_treat_paired.counts" />\n+                    <has_text_matching expression="FBgn0000003\\t0\\t0\\t0\\t0\\t0\\t0\\t0" />\n+                </assert_contents>\n+            </output>\n+            <output name="vst_out">\n+                <assert_contents>\n+                    <has_text_matching expression="GSM461176_untreat_single.counts\\tGSM461177_untreat_paired.counts\\tGSM461178_untreat_paired.counts\\tGSM461182_untreat_single.counts\\tGSM461179_treat_single.counts\\tGSM461180_treat_paired.counts\\tGSM461181_treat_paired.counts" />\n+                    <has_text_matching expression="FBgn0000003\\t5.*\\t5.*\\t5.*\\t5.*\\t5.*\\t5.*\\t5.*" />\n+                </assert_contents>\n+            </output>\n             <output name="deseq_out" >\n                 <assert_contents>\n                     <has_text_matching expression="FBgn0003360\\t1933.9504.*\\t-2.8399.*\\t0.1309.*-21.6851.*2.831.*8.024" />\n@@ -232,7 +278,7 @@\n             </output>\n         </test>\n         <!--Ensure counts files without header works -->\n-        <test expect_num_outputs="2">\n+        <test expect_num_outputs="4">\n             <repeat name="rep_factorName">\n                 <param name="factorName" value="Treatment"/>\n                 <repeat name="rep_factorLevel">\n@@ -247,12 +293,26 @@\n             <param name="header" value="False"/>\n             <param name="pdf" value="False"/>\n             <param name="normCounts" value="True"/>\n+            <param name="normRLog" value="True"/>\n+            <param name="normVST" value="True"/>\n             <output name="counts_out">\n                 <assert_contents>\n                     <has_text_matching expression="GSM461176_untreat_single.counts.noheader\\tGSM461177_untreat_paired.counts.noheader\\tGSM461178_untreat_paired.counts.noheader\\tGSM461182_untreat_single.counts.noheader\\tGSM461179_treat_single.counts.noheader\\tGSM461180_treat_paired.counts.noheader\\tGSM461181_treat_paired.counts.noheader" />\n                     <has_text_matching expression="FBgn0000003\\t0\\t0\\t0\\t0\\t0\\t0\\t0" />\n                 </assert_contents>\n             </output>\n+            <output name="rlog_out">\n+                <assert_contents>\n+                    <has_text_matching expression="GSM461176_untreat_single.counts.noheader\\tGSM461177_untreat_paired.counts.noheader\\tGSM461178_untreat_paired.counts.noheader\\tGSM461182_untreat_single.counts.noheader\\tGSM461179_treat_single.counts.noheader\\tGSM461180_treat_paired.counts.noheader\\tGSM461181_treat_paired.counts.noheader" />\n+                    <has_text_matching expression="FBgn0000003\\t0\\t0\\t0\\t0\\t0\\t0\\t0" />\n+                </assert_contents>\n+            </output>\n+            <output name="vst_out">\n+                <assert_contents>\n+                    <has_text_matching expression="GSM461176_untreat_single.counts.noheader\\tGSM461177_untreat_paired.counts.noheader\\tGSM461178_untreat_paired.counts.noheader\\tGSM461182_untreat_single.counts.noheader\\tGSM461179_treat_single.counts.noheader\\tGSM461180_treat_paired.counts.noheader\\tGSM461181_treat_paired.counts.noheader" />\n+                    <has_text_matching expression="FBgn0000003\\t5.*\\t5.*\\t5.*\\t5.*\\t5.*\\t5.*\\t5.*" />\n+                </assert_contents>\n+            </output>\n             <output name="deseq_out" >\n                 <assert_contents>\n                     <has_text_matching expression="FBgn0003360\\t1933.9504.*\\t-2.8399.*\\t0.1309.*-21.6851.*2.831.*8.024" />\n'