Mercurial > repos > iuc > deseq2

--- a/deseq2.R	Wed Sep 05 15:54:03 2018 -0400
+++ b/deseq2.R	Fri Nov 16 14:47:19 2018 -0500
@@ -49,6 +49,8 @@
   "batch_factors", "", 1, "character",
   "outfile", "o", 1, "character",
   "countsfile", "n", 1, "character",
+  "rlogfile", "r", 1, "character",
+  "vstfile", "v", 1, "character",
   "header", "H", 0, "logical",
   "factors", "f", 1, "character",
   "files_to_labels", "l", 1, "character",
@@ -56,6 +58,7 @@
   "tximport", "i", 0, "logical",
   "txtype", "y", 1, "character",
   "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF)
+  "esf", "e", 1, "character",
   "fit_type", "t", 1, "integer",
   "many_contrasts", "m", 0, "logical",
   "outlier_replace_off" , "a", 0, "logical",
@@ -188,7 +191,10 @@
 }

 dds <- get_deseq_dataset(sampleTable, header=opt$header, designFormula=designFormula, tximport=opt$tximport, txtype=opt$txtype, tx2gene=opt$tx2gene)
-
+# estimate size factors for the chosen method
+if(!is.null(opt$esf)){
+    dds <- estimateSizeFactors(dds, type=opt$esf)
+}
 apply_batch_factors <- function (dds, batch_factors) {
   rownames(batch_factors) <- batch_factors$identifier
   batch_factors <- subset(batch_factors, select = -c(identifier, condition))
@@ -284,6 +290,19 @@
     write.table(normalizedCounts, file=opt$countsfile, sep="\t", col.names=NA, quote=FALSE)
 }

+if (!is.null(opt$rlogfile)) {
+    rLogNormalized <-rlogTransformation(dds)
+    rLogNormalizedMat <- assay(rLogNormalized)
+    write.table(rLogNormalizedMat, file=opt$rlogfile, sep="\t", col.names=NA, quote=FALSE)
+}
+
+if (!is.null(opt$vstfile)) {
+    vstNormalized<-varianceStabilizingTransformation(dds)
+    vstNormalizedMat <- assay(vstNormalized)
+    write.table(vstNormalizedMat, file=opt$vstfile, sep="\t", col.names=NA, quote=FALSE)
+}
+
+
 if (is.null(opt$many_contrasts)) {
   # only contrast the first and second level of the primary factor
   ref <- allLevels[1]
--- a/deseq2.xml	Wed Sep 05 15:54:03 2018 -0400
+++ b/deseq2.xml	Fri Nov 16 14:47:19 2018 -0500
@@ -1,4 +1,4 @@
-<tool id="deseq2" name="DESeq2" version="2.11.40.2">
+<tool id="deseq2" name="DESeq2" version="2.11.40.3">
     <description>Determines differentially expressed features from count tables</description>
     <requirements>
         <requirement type="package" version="1.18.1">bioconductor-deseq2</requirement>
@@ -43,6 +43,12 @@
     #if $normCounts:
         -n '$counts_out'
     #end if
+    #if $normRLog:
+        -r '$rlog_out'
+    #end if
+    #if $normVST:
+        -v '$vst_out'
+    #end if
     #set $filename_to_element_identifiers = {}
     #set $temp_factor_names = list()
     #for $factor in $rep_factorName:
@@ -63,8 +69,11 @@

     -f '#echo json.dumps(temp_factor_names)#'
     -l '#echo json.dumps(filename_to_element_identifiers)#'
+    #if $esf:
+        -e $esf
+    #end if
     -t $fit_type
-    #if $batch_factors
+    #if $batch_factors:
         --batch_factors '$batch_factors'
     #end if
     #if $outlier_replace_off:
@@ -142,9 +151,26 @@
             help="output an additional PDF files" />
         <param name="normCounts" type="boolean" truevalue="1" falsevalue="0" checked="false"
             label="Output normalized counts table" />
+        <param name="normRLog" type="boolean" truevalue="1" falsevalue="0" checked="false"
+            label="Output rLog normalized table" />
+        <param name="normVST" type="boolean" truevalue="1" falsevalue="0" checked="false"
+            label="Output VST normalized table" />
         <param name="many_contrasts" type="boolean" truevalue="1" falsevalue="0" checked="false"
             label="Output all levels vs all levels of primary factor (use when you have >2 levels for primary factor)"
             help=" DESeq2 performs independent ﬁltering by default using the mean of normalized counts as a ﬁlter statistic" />
+        <param name="esf" type="select" label="(Optional) Method for estimateSizeFactors"
+            help="Method for estimation: either 'ratio', 'poscounts', or 'iterate'. 'ratio' uses the standard median ratio method introduced in DESeq.
+                The size factor is the median ratio of the sample over a 'pseudosample': for each gene, the geometric mean of all samples.
+                'poscounts' and 'iterate' offer alternative estimators, which can be used even when all genes contain a sample with a zero (a problem
+                for the default method, as the geometric mean becomes zero, and the ratio undefined). The 'poscounts' estimator deals with a gene with
+                some zeros, by calculating a modified geometric mean by taking the n-th root of the product of the non-zero counts. This evolved out of
+                use cases with Paul McMurdie's phyloseq package for metagenomic samples. The 'iterate' estimator iterates between estimating the dispersion
+                with a design of ~1, and finding a size factor vector by numerically optimizing the likelihood of the ~1 model.">
+            <option value="" selected="true">No Selection (use default)</option>
+            <option value="ratio">ratio</option>
+            <option value="poscounts">poscounts</option>
+            <option value="iterate">iterate</option>
+        </param>
         <param name="fit_type" type="select" label="Fit type">
             <option value="1" selected="true">parametric</option>
             <option value="2">local</option>
@@ -180,10 +206,16 @@
         <data format="tabular" name="counts_out" label="Normalized counts file on ${on_string}">
             <filter>normCounts == True</filter>
         </data>
+        <data format="tabular" name="rlog_out" label="rLog-Normalized counts file on ${on_string}">
+            <filter>normRLog == True</filter>
+        </data>
+        <data format="tabular" name="vst_out" label="VST-Normalized counts file on ${on_string}">
+            <filter>normVST == True</filter>
+        </data>
     </outputs>
     <tests>
         <!--Ensure counts files with header works -->
-        <test expect_num_outputs="2">
+        <test expect_num_outputs="4">
             <repeat name="rep_factorName">
                 <param name="factorName" value="Treatment"/>
                 <repeat name="rep_factorLevel">
@@ -197,12 +229,26 @@
             </repeat>
             <param name="pdf" value="False"/>
             <param name="normCounts" value="True"/>
+            <param name="normRLog" value="True"/>
+            <param name="normVST" value="True"/>
             <output name="counts_out">
                 <assert_contents>
                     <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts" />
                     <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" />
                 </assert_contents>
             </output>
+            <output name="rlog_out">
+                <assert_contents>
+                    <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts" />
+                    <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" />
+                </assert_contents>
+            </output>
+            <output name="vst_out">
+                <assert_contents>
+                    <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts" />
+                    <has_text_matching expression="FBgn0000003\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*" />
+                </assert_contents>
+            </output>
             <output name="deseq_out" >
                 <assert_contents>
                     <has_text_matching expression="FBgn0003360\t1933.9504.*\t-2.8399.*\t0.1309.*-21.6851.*2.831.*8.024" />
@@ -232,7 +278,7 @@
             </output>
         </test>
         <!--Ensure counts files without header works -->
-        <test expect_num_outputs="2">
+        <test expect_num_outputs="4">
             <repeat name="rep_factorName">
                 <param name="factorName" value="Treatment"/>
                 <repeat name="rep_factorLevel">
@@ -247,12 +293,26 @@
             <param name="header" value="False"/>
             <param name="pdf" value="False"/>
             <param name="normCounts" value="True"/>
+            <param name="normRLog" value="True"/>
+            <param name="normVST" value="True"/>
             <output name="counts_out">
                 <assert_contents>
                     <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader" />
                     <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" />
                 </assert_contents>
             </output>
+            <output name="rlog_out">
+                <assert_contents>
+                    <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader" />
+                    <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" />
+                </assert_contents>
+            </output>
+            <output name="vst_out">
+                <assert_contents>
+                    <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader" />
+                    <has_text_matching expression="FBgn0000003\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*" />
+                </assert_contents>
+            </output>
             <output name="deseq_out" >
                 <assert_contents>
                     <has_text_matching expression="FBgn0003360\t1933.9504.*\t-2.8399.*\t0.1309.*-21.6851.*2.831.*8.024" />