changeset 29:cd9874cb9019 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit cbeb1c4c436be04323bd9a809a6393d00b168d07"
author iuc
date Mon, 29 Nov 2021 18:16:48 +0000
parents 7ff33c2d525b
children 8fe98f7094de
files deseq2.R deseq2.xml deseq2_macros.xml
diffstat 3 files changed, 96 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/deseq2.R	Fri Nov 19 21:03:55 2021 +0000
+++ b/deseq2.R	Mon Nov 29 18:16:48 2021 +0000
@@ -52,6 +52,7 @@
   "batch_factors", "w", 1, "character",
   "outfile", "o", 1, "character",
   "countsfile", "n", 1, "character",
+  "sizefactorsfile", "F", 1, "character",
   "rlogfile", "r", 1, "character",
   "vstfile", "v", 1, "character",
   "header", "H", 0, "logical",
@@ -217,6 +218,30 @@
 if (!is.null(opt$esf)) {
     dds <- estimateSizeFactors(dds, type = opt$esf)
 }
+
+# estimate size factors for each sample
+# - https://support.bioconductor.org/p/97676/
+if (!is.null(opt$sizefactorsfile)) {
+    nm <- assays(dds)[["avgTxLength"]]
+    if (!is.null(nm)) {
+        ## Recommended: takes into account tximport data
+        cat("\nsize factors for samples: taking tximport data into account\n")
+        size_factors <- estimateSizeFactorsForMatrix(counts(dds) / nm)
+    } else {
+        norm_factors <- normalizationFactors(dds)
+        if (!is.null(norm_factors)) {
+            ## In practice, gives same results as above.
+            cat("\nsize factors for samples: no tximport data, using derived normalization factors\n")
+            size_factors <- estimateSizeFactorsForMatrix(norm_factors)
+        } else {
+            ## If we have no other information, estimate from raw.
+            cat("\nsize factors for samples: no tximport data, no normalization factors, estimating from raw data\n")
+            size_factors <- estimateSizeFactorsForMatrix(counts(dds))
+        }
+    }
+    write.table(size_factors, file = opt$sizefactorsfile, sep = "\t", col.names = F, quote = FALSE)
+}
+
 apply_batch_factors <- function(dds, batch_factors) {
   rownames(batch_factors) <- batch_factors$identifier
   batch_factors <- subset(batch_factors, select = -c(identifier, condition))
--- a/deseq2.xml	Fri Nov 19 21:03:55 2021 +0000
+++ b/deseq2.xml	Mon Nov 29 18:16:48 2021 +0000
@@ -47,6 +47,9 @@
     #if 'normCounts' in $output_options.output_selector:
         -n '$counts_out'
     #end if
+    #if 'sizefactors' in $output_options.output_selector:
+        -F '$sizefactors_out'
+    #end if
     #if 'normRLog' in $output_options.output_selector:
         -r '$rlog_out'
     #end if
@@ -195,6 +198,7 @@
         <section name="output_options" title="Output options">
             <param name="output_selector" type="select" multiple="True" optional="true" display="checkboxes" label="Output selector">
                 <option value="pdf" selected="True">Generate plots for visualizing the analysis results</option>
+                <option value="sizefactors" >Output sample size factors</option>
                 <option value="normCounts">Output normalised counts</option>
                 <option value="normVST">Output VST normalized table</option>
                 <option value="normRLog">Output rLog normalized table</option>
@@ -217,6 +221,9 @@
         <data name="plots" format="pdf" label="DESeq2 plots on ${on_string}">
             <filter>output_options['output_selector'] and 'pdf' in output_options['output_selector']</filter>
         </data>
+        <data name="sizefactors_out" format="tabular" label="Size Factors on ${on_string}">
+            <filter>output_options['output_selector'] and 'sizefactors' in output_options['output_selector']</filter>
+        </data>
         <data name="counts_out" format="tabular" label="Normalized counts file on ${on_string}">
             <filter>output_options['output_selector'] and 'normCounts' in output_options['output_selector']</filter>
         </data>
@@ -513,6 +520,65 @@
                 </assert_contents>
             </output>
         </test>
+        <!-- Same as above alpha_ma test, but with size factors -->
+        <test expect_num_outputs="2">
+            <repeat name="rep_factorName">
+                <param name="factorName" value="Treatment"/>
+                <repeat name="rep_factorLevel">
+                    <param name="factorLevel" value="Treated"/>
+                    <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf1.tab,sailfish_ensembl/sailfish_quant.sf2.tab,sailfish_ensembl/sailfish_quant.sf3.tab"/>
+                </repeat>
+                <repeat name="rep_factorLevel">
+                    <param name="factorLevel" value="Untreated"/>
+                    <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/>
+                </repeat>
+            </repeat>
+            <section name="output_options">
+                <param name="output_selector" value="sizefactors"/>
+                <param name="alpha_ma" value="0.05"/>
+            </section>
+            <param name="tximport_selector" value="tximport"/>
+            <param name="txtype" value="sailfish"/>
+            <param name="mapping_format_selector" value="gtf"/>
+            <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/>
+            <output name="sizefactors_out">
+                <assert_contents>
+                    <has_text_matching expression="sailfish_quant\.sf4\.tab\t0\.8\d+" />
+                    <has_text_matching expression="sailfish_quant\.sf3\.tab\t1\.0\d+" />
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Same as above alpha_ma size factor test, but with a non-default estimator-->
+        <test expect_num_outputs="2">
+            <repeat name="rep_factorName">
+                <param name="factorName" value="Treatment"/>
+                <repeat name="rep_factorLevel">
+                    <param name="factorLevel" value="Treated"/>
+                    <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf1.tab,sailfish_ensembl/sailfish_quant.sf2.tab,sailfish_ensembl/sailfish_quant.sf3.tab"/>
+                </repeat>
+                <repeat name="rep_factorLevel">
+                    <param name="factorLevel" value="Untreated"/>
+                    <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/>
+                </repeat>
+            </repeat>
+            <section name="advanced_options">
+                <param name="esf" value="poscounts" />
+            </section>
+            <section name="output_options">
+                <param name="output_selector" value="sizefactors"/>
+                <param name="alpha_ma" value="0.05"/>
+            </section>
+            <param name="tximport_selector" value="tximport"/>
+            <param name="txtype" value="sailfish"/>
+            <param name="mapping_format_selector" value="gtf"/>
+            <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/>
+            <output name="sizefactors_out" >
+                <assert_contents>
+                    <has_text_matching expression="sailfish_quant\.sf4\.tab\t0\.8\d+" />
+                    <has_text_matching expression="sailfish_quant\.sf3\.tab\t1\.0\d+" />
+                </assert_contents>
+            </output>
+        </test>
     </tests>
     <help><![CDATA[
 .. class:: infomark
@@ -608,6 +674,10 @@
        which controls false discovery rate (FDR)
 ====== ==========================================================
 
+By selecting ``Output sample size factors`` in the "Output options"
+selection box, the size factors used to normalize the samples can also
+be output as a tabular file.
+
 .. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html
 .. _tximport: https://bioconductor.org/packages/devel/bioc/vignettes/tximport/inst/doc/tximport.html
     ]]></help>
--- a/deseq2_macros.xml	Fri Nov 19 21:03:55 2021 +0000
+++ b/deseq2_macros.xml	Mon Nov 29 18:16:48 2021 +0000
@@ -33,7 +33,7 @@
         </requirements>
     </xml>
     <token name="@TOOL_VERSION@">2.11.40.7</token>
-    <token name="@SUFFIX_VERSION@">0</token>
+    <token name="@SUFFIX_VERSION@">1</token>
     <xml name="edam_ontology">
         <edam_topics>                                                                                  
             <edam_topic>topic_3308</edam_topic>