changeset 31:9a882d108833 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 469558ddf5bc6249874fe5826637fd6ee81588cf
author iuc
date Tue, 18 Jul 2023 14:58:52 +0000
parents 8fe98f7094de
children
files deseq2.R deseq2.xml deseq2_macros.xml macros.xml test-data/size_factors_out.tsv
diffstat 5 files changed, 260 insertions(+), 135 deletions(-) [+]
line wrap: on
line diff
--- a/deseq2.R	Fri Aug 26 11:16:15 2022 +0000
+++ b/deseq2.R	Tue Jul 18 14:58:52 2023 +0000
@@ -36,9 +36,6 @@
   q("no", 1, FALSE)
 })
 
-# we need that to not crash galaxy with an UTF8 error on German LC settings.
-loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
-
 library("getopt")
 library("tools")
 options(stringAsFactors = FALSE, useFancyQuotes = FALSE)
@@ -69,7 +66,7 @@
   "outlier_replace_off", "a", 0, "logical",
   "outlier_filter_off", "b", 0, "logical",
   "auto_mean_filter_off", "c", 0, "logical",
-  "beta_prior_off", "d", 0, "logical",
+  "use_beta_priors", "d", 0, "logical",
   "alpha_ma", "A", 1, "numeric",
   "prefilter", "P", 0, "logical",
   "prefilter_value", "V", 1, "numeric"
@@ -217,11 +214,27 @@
 }
 
 dds <- get_deseq_dataset(sample_table, header = opt$header, design_formula = design_formula, tximport = opt$tximport, txtype = opt$txtype, tx2gene = opt$tx2gene)
-# estimate size factors for the chosen method
+
+# use/estimate size factors with the chosen method
 if (!is.null(opt$esf)) {
-    dds <- estimateSizeFactors(dds, type = opt$esf)
+    if (opt$esf %in% list("ratio", "poscounts", "iterate")) {
+        cat("Calculating size factors de novo\n")
+        dds <- estimateSizeFactors(dds, type = opt$esf)
+    } else {
+        sf_table <- read.table(opt$esf)
+        # Sort the provided size factors just in case the order differs from the input file order.
+        merged_table <- merge(sample_table, sf_table, by.x = 0, by.y = 1, sort = FALSE)
+        sf_values <- as.numeric(unlist(merged_table[5]))
+        "sizeFactors"(dds) <- sf_values
+
+        cat("Using user-provided size factors:\n")
+        print(sf_values)
+    }
+} else {
+    cat("No size factor was used\n")
 }
 
+
 # estimate size factors for each sample
 # - https://support.bioconductor.org/p/97676/
 if (!is.null(opt$sizefactorsfile)) {
@@ -312,12 +325,15 @@
 }
 
 # shrinkage of LFCs
-if (is.null(opt$beta_prior_off)) {
-  beta_prior <- TRUE
+if (is.null(opt$use_beta_priors)) {
+  beta_prior <- FALSE
+  if (verbose)
+    cat("Applied default - beta prior off\n")
 } else {
-  beta_prior <- FALSE
-  if (verbose) cat("beta prior off\n")
+  beta_prior <- opt$use_beta_priors
 }
+sprintf("use_beta_prior is set to %s", beta_prior)
+
 
 # dispersion fit type
 if (is.null(opt$fit_type)) {
--- a/deseq2.xml	Fri Aug 26 11:16:15 2022 +0000
+++ b/deseq2.xml	Tue Jul 18 14:58:52 2023 +0000
@@ -1,24 +1,24 @@
-<tool id="deseq2" name="DESeq2" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@">
+<tool id="deseq2" name="DESeq2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
     <description>Determines differentially expressed features from count tables</description>
     <macros>
-        <import>deseq2_macros.xml</import>
+        <import>macros.xml</import>
     </macros>
+    <expand macro='edam_ontology'/>
+    <expand macro='xrefs'/>
     <expand macro='requirements'/>
-    <expand macro='edam_ontology' />
-    <expand macro='xrefs'/>
     <stdio>
         <regex match="Execution halted"
            source="both"
            level="fatal"
-           description="Execution halted." />
+           description="Execution halted."/>
         <regex match="Error in"
            source="both"
            level="fatal"
-           description="An undefined error occurred, please check your input carefully and contact your administrator." />
+           description="An undefined error occurred, please check your input carefully and contact your administrator."/>
         <regex match="Fatal error"
            source="both"
            level="fatal"
-           description="An undefined error occurred, please check your input carefully and contact your administrator." />
+           description="An undefined error occurred, please check your input carefully and contact your administrator."/>
     </stdio>
     <version_command><![CDATA[
 echo $(R --version | grep version | grep -v GNU)", DESeq2 version" $(R --vanilla --slave -e "library(DESeq2); cat(sessionInfo()\$otherPkgs\$DESeq2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
@@ -85,8 +85,12 @@
 
     -f '#echo json.dumps(temp_factor_names)#'
     -l '#echo json.dumps(filename_to_element_identifiers)#'
-    #if $advanced_options.esf:
-        -e $advanced_options.esf
+    #if $advanced_options.esf_cond.esf:
+        #if $advanced_options.esf_cond.esf == "user":
+            -e $advanced_options.esf_cond.size_factor_input
+        #else:
+            -e $advanced_options.esf_cond.esf
+        #end if
     #end if
     -t $advanced_options.fit_type
     #if $batch_factors:
@@ -96,15 +100,12 @@
         $advanced_options.prefilter_conditional.prefilter
         -V $advanced_options.prefilter_conditional.prefilter_value
     #end if
-    #if $advanced_options.outlier_replace_off:
-        -a
-    #end if
-    #if $advanced_options.outlier_filter_off:
-        -b
-    #end if
-    #if $advanced_options.auto_mean_filter_off:
-        -c
-    #end if
+    
+    $advanced_options.outlier_replace_off
+    $advanced_options.outlier_filter_off
+    $advanced_options.auto_mean_filter_off
+    $advanced_options.use_beta_priors
+
     #if 'many_contrasts' in $output_options.output_selector
         -m
     #end if
@@ -116,7 +117,6 @@
         #else:
             -x mapping.txt
         #end if
-
     #end if
 ]]></command>
     <inputs>
@@ -139,7 +139,7 @@
         </conditional>
 
         <param name="batch_factors" type="data" format="tabular" optional="true" label="(Optional) provide a tabular file with additional batch factors to include in the model." help="You can produce this file using RUVSeq or svaseq."/>
-        <param name="header" type="boolean" truevalue="-H" falsevalue="" checked="true" label="Files have header?" help="If this option is set to Yes, the tool will assume that the count files have column headers in the first row. Default: Yes" />
+        <param name="header" type="boolean" truevalue="-H" falsevalue="" checked="true" label="Files have header?" help="If this option is set to Yes, the tool will assume that the count files have column headers in the first row. Default: Yes"/>
 
         <conditional name="tximport">
             <param name="tximport_selector" type="select" label="Choice of Input data">
@@ -165,39 +165,49 @@
                     </when>
                 </conditional>
             </when>
-            <when value="count" />
+            <when value="count"/>
         </conditional>
         <section name="advanced_options" title="Advanced options">
-            <param name="esf" type="select" label="Method for estimateSizeFactors" 
-                help="Method for estimation: either 'ratio', 'poscounts', or 'iterate'. 'ratio' uses the standard median ratio method introduced in DESeq. 
-                    The size factor is the median ratio of the sample over a 'pseudosample': for each gene, the geometric mean of all samples. 
-                    'poscounts' and 'iterate' offer alternative estimators, which can be used even when all genes contain a sample with a zero (a problem 
-                    for the default method, as the geometric mean becomes zero, and the ratio undefined). The 'poscounts' estimator deals with a gene with 
-                    some zeros, by calculating a modified geometric mean by taking the n-th root of the product of the non-zero counts. This evolved out of 
-                    use cases with Paul McMurdie's phyloseq package for metagenomic samples. The 'iterate' estimator iterates between estimating the dispersion 
-                    with a design of ~1, and finding a size factor vector by numerically optimizing the likelihood of the ~1 model.">
-                <option value="" selected="true">No Selection (use default)</option>
-                <option value="ratio">ratio</option>
-                <option value="poscounts">poscounts</option>
-                <option value="iterate">iterate</option>
-            </param>
+            <conditional name="esf_cond">
+                <param name="esf" type="select" label="Method for estimateSizeFactors"
+                    help="Method for estimation: either 'ratio', 'poscounts', or 'iterate'. 'ratio' uses the standard median ratio method introduced in DESeq.
+                        The size factor is the median ratio of the sample over a 'pseudosample': for each gene, the geometric mean of all samples.
+                        'poscounts' and 'iterate' offer alternative estimators, which can be used even when all genes contain a sample with a zero (a problem
+                        for the default method, as the geometric mean becomes zero, and the ratio undefined). The 'poscounts' estimator deals with a gene with
+                        some zeros, by calculating a modified geometric mean by taking the n-th root of the product of the non-zero counts. This evolved out of
+                        use cases with Paul McMurdie's phyloseq package for metagenomic samples. The 'iterate' estimator iterates between estimating the dispersion
+                        with a design of ~1, and finding a size factor vector by numerically optimizing the likelihood of the ~1 model.">
+                    <option value="" selected="true">No Selection (use default)</option>
+                    <option value="ratio">ratio</option>
+                    <option value="poscounts">poscounts</option>
+                    <option value="iterate">iterate</option>
+                    <option value="user">User-provided</option>
+                </param>
+                <when value=""/>
+                <when value="ratio"/>
+                <when value="poscounts"/>
+                <when value="iterate"/>
+                <when value="user">
+                    <param name="size_factor_input" type="data" format="tabular" label="File with custom size factors" help="The input must be a 2-column file: col1 should have the input file names. Col2 should contain your custom size factors."/>
+                </when>
+            </conditional>
             <param name="fit_type" type="select" label="Fit type">
                 <option value="1" selected="true">parametric</option>
                 <option value="2">local</option>
                 <option value="3">mean</option>
             </param>
-            <param name="outlier_replace_off" type="boolean" truevalue="1" falsevalue="0" checked="false"
+            <param name="outlier_replace_off" type="boolean" truevalue="-a" falsevalue="" checked="false"
                 label="Turn off outliers replacement (only affects with >6 replicates)"
                 help="When there are more than 6 replicates for a given sample, the DESeq2 will automatically replace
                     counts with large Cook’s distance with the trimmed mean over all samples, scaled up by the size factor
-                    or normalization factor for that sample" />
-            <param name="outlier_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false"
+                    or normalization factor for that sample"/>
+            <param name="outlier_filter_off" type="boolean" truevalue="-b" falsevalue="" checked="false"
                 label="Turn off outliers filtering (only affects with >2 replicates)"
                 help="When there are more than 2 replicates for a given sample, the DESeq2 will automatically
-                    filter genes which contain a Cook’s distance above a cutoff" />
-            <param name="auto_mean_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false"
+                    filter genes which contain a Cook’s distance above a cutoff"/>
+            <param name="auto_mean_filter_off" type="boolean" truevalue="-c" falsevalue="" checked="false"
                 label="Turn off independent filtering"
-                help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" />
+                help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic"/>
             <conditional name="prefilter_conditional">
                 <param name="prefilter" type="select" label="Perform pre-filtering" help="While it is not necessary to pre-filter 
                     low count genes before running the DESeq2 functions, there are two reasons which make pre-filtering useful: 
@@ -207,28 +217,31 @@
                     <option value="" selected="true">Disabled</option>
                 </param>
                 <when value="-P">
-                    <param name="prefilter_value" type="integer" min="0" value="10" label="Pre-filter value" help="Keep only rows that have at least N reads total." />
+                    <param name="prefilter_value" type="integer" min="0" value="10" label="Pre-filter value" help="Keep only rows that have at least N reads total."/>
                 </when>
                 <when value=""/>
             </conditional>
+            <param name="use_beta_priors" type="boolean" truevalue="-d" falsevalue="" checked="false"
+                label="Use beta priors"
+                help="Whether or not to put a zero-mean normal prior on the non-intercept coefficients"/>
         </section>
         <section name="output_options" title="Output options">
             <param name="output_selector" type="select" multiple="True" optional="true" display="checkboxes" label="Output selector">
                 <option value="pdf" selected="True">Generate plots for visualizing the analysis results</option>
-                <option value="sizefactors" >Output sample size factors</option>
+                <option value="sizefactors">Output sample size factors</option>
                 <option value="normCounts">Output normalised counts</option>
                 <option value="normVST">Output VST normalized table</option>
                 <option value="normRLog">Output rLog normalized table</option>
                 <option value="many_contrasts">Output all levels vs all levels of primary factor (use when you have >2 levels for primary factor)</option>
             </param>
-            <param name="alpha_ma" type="float" min="0" max="0.5" value="0.1" label="Alpha value for MA-plot" help="Default value is 0.1. This option is only meaninful when plots are generated" />
+            <param name="alpha_ma" type="float" min="0" max="0.5" value="0.1" label="Alpha value for MA-plot" help="Default value is 0.1. This option is only meaninful when plots are generated"/>
         </section>
     </inputs>
     <outputs>
         <data name="deseq_out" format="tabular" label="DESeq2 result file on ${on_string}">
             <filter>'many_contrasts' not in output_options['output_selector']</filter>
             <actions>
-                <action name="column_names" type="metadata" default="GeneID,Base mean,log2(FC),StdErr,Wald-Stats,P-value,P-adj" />
+                <action name="column_names" type="metadata" default="GeneID,Base mean,log2(FC),StdErr,Wald-Stats,P-value,P-adj"/>
             </actions>
         </data>
         <collection name="split_output" type="list" label="DESeq2 result files on ${on_string}">
@@ -265,30 +278,33 @@
                     <param name="countsFile" value="GSM461176_untreat_single.counts,GSM461177_untreat_paired.counts,GSM461178_untreat_paired.counts,GSM461182_untreat_single.counts"/>
                 </repeat>
             </repeat>
+            <section name="advanced_options">
+                <param name="use_beta_priors" value="1"/>
+            </section>
             <section name="output_options">
                 <param name="output_selector" value="normCounts,normRLog,normVST"/>
             </section>
             <output name="counts_out">
                 <assert_contents>
-                    <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts" />
-                    <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" />
+                    <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts"/>
+                    <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0"/>
                 </assert_contents>
             </output>
             <output name="rlog_out">
                 <assert_contents>
-                    <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts" />
-                    <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" />
+                    <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts"/>
+                    <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0"/>
                 </assert_contents>
             </output>
             <output name="vst_out">
                 <assert_contents>
-                    <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts" />
-                    <has_text_matching expression="FBgn0000003\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*" />
+                    <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts"/>
+                    <has_text_matching expression="FBgn0000003\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*"/>
                 </assert_contents>
             </output>
             <output name="deseq_out" >
                 <assert_contents>
-                    <has_text_matching expression="FBgn0003360\t1933\.9504.*\t-2\.8399.*\t0\.1309.*\t-21\.68.*\t.*e-104\t.*e-101" />
+                    <has_text_matching expression="FBgn0003360\t1933\.9504.*\t-2\.8399.*\t0\.1309.*\t-21\.68.*\t.*e-104\t.*e-101"/>
                     <has_n_lines n="3999"/>
                 </assert_contents>
             </output>
@@ -307,12 +323,41 @@
                 </repeat>
             </repeat>
             <param name="batch_factors" value="batch_factors.tab"/>
+            <section name="advanced_options">
+                <param name="use_beta_priors" value="1"/>
+            </section>
             <section name="output_options">
                 <param name="output_selector" value="normCounts"/>
             </section>
             <output name="deseq_out">
                 <assert_contents>
-                    <has_text_matching expression="FBgn0003360\t1933.*\t-2.9.*\t0.1.*\t-26.*\t1.*-152\t4.*-149" />
+                    <has_text_matching expression="FBgn0003360\t1933.*\t-2.9.*\t0.1.*\t-26.*\t1.*-152\t4.*-149"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Same as above, but without beta priors -->
+        <test expect_num_outputs="2">
+            <repeat name="rep_factorName">
+                <param name="factorName" value="Treatment"/>
+                <repeat name="rep_factorLevel">
+                    <param name="factorLevel" value="Treated"/>
+                    <param name="countsFile" value="GSM461179_treat_single.counts,GSM461180_treat_paired.counts,GSM461181_treat_paired.counts"/>
+                </repeat>
+                <repeat name="rep_factorLevel">
+                    <param name="factorLevel" value="Untreated"/>
+                    <param name="countsFile" value="GSM461176_untreat_single.counts,GSM461177_untreat_paired.counts,GSM461178_untreat_paired.counts,GSM461182_untreat_single.counts"/>
+                </repeat>
+            </repeat>
+            <param name="batch_factors" value="batch_factors.tab"/>
+            <section name="advanced_options">
+                <param name="use_beta_priors" value="0"/>
+            </section>
+            <section name="output_options">
+                <param name="output_selector" value="normCounts"/>
+            </section>
+            <output name="deseq_out">
+                <assert_contents>
+                    <has_text_matching expression="FBgn0003360\t1933.*\t-3.*\t0.1.*\t-26.*\t6.*-151\t1.*-147"/>
                 </assert_contents>
             </output>
         </test>
@@ -330,30 +375,33 @@
                 </repeat>
             </repeat>
             <param name="header" value="False"/>
+            <section name="advanced_options">
+                <param name="use_beta_priors" value="1"/>
+            </section>
             <section name="output_options">
                 <param name="output_selector" value="normCounts,normRLog,normVST"/>
             </section>        
             <output name="counts_out">
                 <assert_contents>
-                    <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader" />
-                    <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" />
+                    <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader"/>
+                    <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0"/>
                 </assert_contents>
             </output>
             <output name="rlog_out">
                 <assert_contents>
-                    <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader" />
-                    <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" />
+                    <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader"/>
+                    <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0"/>
                 </assert_contents>
             </output>
             <output name="vst_out">
                 <assert_contents>
-                    <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader" />
-                    <has_text_matching expression="FBgn0000003\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*" />
+                    <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader"/>
+                    <has_text_matching expression="FBgn0000003\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*"/>
                 </assert_contents>
             </output>
             <output name="deseq_out" >
                 <assert_contents>
-                    <has_text_matching expression="FBgn0003360\t1933\.9504.*\t-2\.8399.*\t0\.1309.*\t-21\.68.*\t.*e-104\t.*e-101" />
+                    <has_text_matching expression="FBgn0003360\t1933\.9504.*\t-2\.8399.*\t0\.1309.*\t-21\.68.*\t.*e-104\t.*e-101"/>
                 </assert_contents>
             </output>
         </test>
@@ -370,6 +418,9 @@
                     <param name="countsFile" value="sailfish/sailfish_quant.sf4.tab,sailfish/sailfish_quant.sf5.tab,sailfish/sailfish_quant.sf6.tab"/>
                 </repeat>
             </repeat>
+            <section name="advanced_options">
+                <param name="use_beta_priors" value="1"/>
+            </section>
             <section name="output_options">
                 <param name="output_selector" value=""/>
             </section>            
@@ -379,7 +430,7 @@
             <param name="tabular_file" value="tx2gene.tab"/>
             <output name="deseq_out" >
                 <assert_contents>
-                    <has_text_matching expression="UGT3A2\t1.8841.*\t-0.1329.*\t0.6936.*\t-0.1917.*\t0.8479.*\t0.9999.*" />
+                    <has_text_matching expression="UGT3A2\t1.8841.*\t-0.1329.*\t0.6936.*\t-0.1917.*\t0.8479.*\t0.9999.*"/>
                 </assert_contents>
             </output>
         </test>
@@ -396,6 +447,9 @@
                     <param name="countsFile" value="sailfish/sailfish_quant.sf4.tab,sailfish/sailfish_quant.sf5.tab,sailfish/sailfish_quant.sf6.tab"/>
                 </repeat>
             </repeat>
+            <section name="advanced_options">
+                <param name="use_beta_priors" value="1"/>
+            </section>
             <section name="output_options">
                 <param name="output_selector" value=""/>
             </section>            
@@ -405,7 +459,7 @@
             <param name="gtf_file" value="GRCh38_latest_genomic.gff"/>
             <output name="deseq_out" >
                 <assert_contents>
-                    <has_text_matching expression="UGT3A2\t1.8841.*\t-0.1329.*\t0.6936.*\t-0.1917.*\t0.8479.*\t0.9999.*" />
+                    <has_text_matching expression="UGT3A2\t1.8841.*\t-0.1329.*\t0.6936.*\t-0.1917.*\t0.8479.*\t0.9999.*"/>
                 </assert_contents>
             </output>
         </test>
@@ -422,6 +476,9 @@
                     <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/>
                 </repeat>
             </repeat>
+            <section name="advanced_options">
+                <param name="use_beta_priors" value="1"/>
+            </section>
             <section name="output_options">
                 <param name="output_selector" value=""/>
             </section>            
@@ -431,7 +488,7 @@
             <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/>
             <output name="deseq_out" >
                 <assert_contents>
-                    <has_text_matching expression="ENSG00000168671\t1.8841.*\t-0.1180.*\t0.7429.*\t-0.1589.*\t0.8737.*\t0.9999.*" />
+                    <has_text_matching expression="ENSG00000168671\t1.8841.*\t-0.1180.*\t0.7429.*\t-0.1589.*\t0.8737.*\t0.9999.*"/>
                 </assert_contents>
             </output>
         </test>
@@ -459,6 +516,9 @@
                     <param name="groups" value="primary:untreated"/>
                 </repeat>
             </repeat>
+            <section name="advanced_options">
+                <param name="use_beta_priors" value="1"/>
+            </section>
             <section name="output_options">
                 <param name="output_selector" value=""/>
             </section>            
@@ -496,6 +556,9 @@
                     <param name="groups" value="primary:untreated"/>
                 </repeat>
             </repeat>
+            <section name="advanced_options">
+                <param name="use_beta_priors" value="1"/>
+            </section>
             <section name="output_options">
                 <param name="output_selector" value="many_contrasts"/>
             </section>            
@@ -524,6 +587,9 @@
                     <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/>
                 </repeat>
             </repeat>
+            <section name="advanced_options">
+                <param name="use_beta_priors" value="1"/>
+            </section>
             <section name="output_options">
                 <param name="output_selector" value=""/>
                 <param name="alpha_ma" value="0.05"/>
@@ -534,7 +600,7 @@
             <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/>
             <output name="deseq_out" >
                 <assert_contents>
-                    <has_text_matching expression="ENSG00000168671\t1.8841.*\t-0.1180.*\t0.7429.*\t-0.1589.*\t0.8737.*\t0.9999.*" />
+                    <has_text_matching expression="ENSG00000168671\t1.8841.*\t-0.1180.*\t0.7429.*\t-0.1589.*\t0.8737.*\t0.9999.*"/>
                 </assert_contents>
             </output>
         </test>
@@ -561,8 +627,42 @@
             <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/>
             <output name="sizefactors_out">
                 <assert_contents>
-                    <has_text_matching expression="sailfish_quant\.sf4\.tab\t0\.8\d+" />
-                    <has_text_matching expression="sailfish_quant\.sf3\.tab\t1\.0\d+" />
+                    <has_text_matching expression="sailfish_quant\.sf4\.tab\t0\.8\d+"/>
+                    <has_text_matching expression="sailfish_quant\.sf3\.tab\t1\.0\d+"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!--Test alpha_ma option, but with user-provided size factors -->
+        <test expect_num_outputs="1">
+            <repeat name="rep_factorName">
+                <param name="factorName" value="Treatment"/>
+                <repeat name="rep_factorLevel">
+                    <param name="factorLevel" value="Treated"/>
+                    <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf1.tab,sailfish_ensembl/sailfish_quant.sf2.tab,sailfish_ensembl/sailfish_quant.sf3.tab"/>
+                </repeat>
+                <repeat name="rep_factorLevel">
+                    <param name="factorLevel" value="Untreated"/>
+                    <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/>
+                </repeat>
+            </repeat>
+            <section name="advanced_options">
+                <param name="use_beta_priors" value="1"/>
+                <conditional name="esf_cond">
+                    <param name="esf" value="user"/>
+                    <param name="size_factor_input" value="size_factors_out.tsv"/>
+                </conditional>
+            </section>
+            <section name="output_options">
+                <param name="output_selector" value=""/>
+                <param name="alpha_ma" value="0.05"/>
+            </section>
+            <param name="tximport_selector" value="tximport"/>
+            <param name="txtype" value="sailfish"/>
+            <param name="mapping_format_selector" value="gtf"/>
+            <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/>
+            <output name="deseq_out">
+                <assert_contents>
+                    <has_text_matching expression="ENSG00000168671\t1.90.*\t-0.05.*\t0.75.*\t-0.07.*\t0.94.*\t0.95.*"/>
                 </assert_contents>
             </output>
         </test>
@@ -580,7 +680,7 @@
                 </repeat>
             </repeat>
             <section name="advanced_options">
-                <param name="esf" value="poscounts" />
+                <param name="esf" value="poscounts"/>
             </section>
             <section name="output_options">
                 <param name="output_selector" value="sizefactors"/>
@@ -592,8 +692,8 @@
             <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/>
             <output name="sizefactors_out" >
                 <assert_contents>
-                    <has_text_matching expression="sailfish_quant\.sf4\.tab\t0\.8\d+" />
-                    <has_text_matching expression="sailfish_quant\.sf3\.tab\t1\.0\d+" />
+                    <has_text_matching expression="sailfish_quant\.sf4\.tab\t0\.8\d+"/>
+                    <has_text_matching expression="sailfish_quant\.sf3\.tab\t1\.0\d+"/>
                 </assert_contents>
             </output>
         </test>
@@ -636,7 +736,7 @@
 
 **What it does**
 
-Estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution
+Uses DESeq2 version @DESEQ2_VERSION@ to estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution.
 
 -----
 
@@ -732,5 +832,5 @@
 .. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html
 .. _tximport: https://bioconductor.org/packages/devel/bioc/vignettes/tximport/inst/doc/tximport.html
     ]]></help>
-    <expand macro="citations" />
+    <expand macro="citations"/>
 </tool>
--- a/deseq2_macros.xml	Fri Aug 26 11:16:15 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-<macros>
-    <xml name="factor_repeat">
-        <repeat name="rep_factorName" title="Factor" min="1">
-            <param name="factorName" type="text" value="FactorName" label="Specify a factor name, e.g. effects_drug_x or cancer_markers"
-                help="Only letters, numbers and underscores will be retained in this field">
-                <sanitizer>
-                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
-                </sanitizer>
-            </param>
-            <repeat name="rep_factorLevel" title="Factor level" min="2" default="2">
-                <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level, typical values could be 'tumor', 'normal', 'treated' or 'control'"
-                    help="Only letters, numbers and underscores will be retained in this field">
-                    <sanitizer>
-                        <valid initial="string.letters,string.digits"><add value="_" /></valid>
-                    </sanitizer>
-                </param>
-                <yield/>
-            </repeat>
-        </repeat>
-    </xml>
-    <xml name="requirements">
-        <requirements>
-            <requirement type="package" version="1.34.0">bioconductor-deseq2</requirement>
-            <!-- Optional dependency of tximport, needed to import kallisto results https://github.com/galaxyproject/usegalaxy-playbook/issues/161 -->
-            <requirement type="package" version="2.38.0">bioconductor-rhdf5</requirement>
-            <requirement type="package" version="1.22.0">bioconductor-tximport</requirement>
-            <requirement type="package" version="1.46.1">bioconductor-genomicfeatures</requirement>
-            <requirement type="package" version="1.20.3">r-getopt</requirement>
-            <requirement type="package" version="0.9.1">r-ggrepel</requirement>
-            <requirement type="package" version="3.1.1">r-gplots</requirement>
-            <requirement type="package" version="1.0.12">r-pheatmap</requirement>
-            <requirement type="package" version="0.2.20">r-rjson</requirement>
-        </requirements>
-    </xml>
-    <token name="@TOOL_VERSION@">2.11.40.7</token>
-    <token name="@SUFFIX_VERSION@">2</token>
-    <xml name="edam_ontology">
-        <edam_topics>                                                                                  
-            <edam_topic>topic_3308</edam_topic>
-        </edam_topics>
-        <edam_operations>
-            <edam_operation>operation_3800</edam_operation>
-        </edam_operations>
-    </xml>
-    <xml name="citations">
-        <citations>
-            <citation type="doi">10.1186/s13059-014-0550-8</citation>
-        </citations>
-    </xml>
-    <xml name="xrefs">
-        <xrefs>
-          <xref type='bio.tools'>DESeq2</xref>
-        </xrefs>
-      </xml>
-</macros>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Tue Jul 18 14:58:52 2023 +0000
@@ -0,0 +1,58 @@
+<macros>
+    <xml name="factor_repeat">
+        <repeat name="rep_factorName" title="Factor" min="1">
+            <param name="factorName" type="text" value="FactorName" label="Specify a factor name, e.g. effects_drug_x or cancer_markers"
+                help="Only letters, numbers and underscores will be retained in this field">
+                <sanitizer>
+                    <valid initial="string.letters,string.digits"><add value="_" /></valid>
+                </sanitizer>
+            </param>
+            <repeat name="rep_factorLevel" title="Factor level" min="2" default="2">
+                <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level, typical values could be 'tumor', 'normal', 'treated' or 'control'"
+                    help="Only letters, numbers and underscores will be retained in this field">
+                    <sanitizer>
+                        <valid initial="string.letters,string.digits"><add value="_" /></valid>
+                    </sanitizer>
+                </param>
+                <yield/>
+            </repeat>
+        </repeat>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@DESEQ2_VERSION@">bioconductor-deseq2</requirement>
+            <!-- Optional dependency of tximport, needed to import kallisto results https://github.com/galaxyproject/usegalaxy-playbook/issues/161 -->
+            <requirement type="package" version="2.44.0">bioconductor-rhdf5</requirement>
+            <requirement type="package" version="1.28.0">bioconductor-tximport</requirement>
+            <requirement type="package" version="1.52.1">bioconductor-genomicfeatures</requirement>
+            <requirement type="package" version="1.20.3">r-getopt</requirement>
+            <requirement type="package" version="0.9.3">r-ggrepel</requirement>
+            <requirement type="package" version="3.1.3">r-gplots</requirement>
+            <requirement type="package" version="1.0.12">r-pheatmap</requirement>
+            <requirement type="package" version="0.2.21">r-rjson</requirement>
+        </requirements>
+    </xml>
+    <token name="@TOOL_VERSION@">2.11.40.8</token>
+    <token name="@DESEQ2_VERSION@">1.40.2</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">22.01</token>
+    <xml name="edam_ontology">
+        <edam_topics>
+            <edam_topic>topic_3308</edam_topic>
+        </edam_topics>
+        <edam_operations>
+            <edam_operation>operation_3800</edam_operation>
+        </edam_operations>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1186/s13059-014-0550-8</citation>
+        </citations>
+    </xml>
+    <xml name="xrefs">
+        <xrefs>
+            <xref type="bio.tools">DESeq2</xref>
+            <xref type="bioconductor">deseq2</xref>
+        </xrefs>
+      </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/size_factors_out.tsv	Tue Jul 18 14:58:52 2023 +0000
@@ -0,0 +1,6 @@
+sailfish_quant.sf4.tab	0.84800690799672
+sailfish_quant.sf5.tab	1.10790786350701
+sailfish_quant.sf6.tab	1.21319523337605
+sailfish_quant.sf1.tab	1.19061589081921
+sailfish_quant.sf2.tab	0.712203801356132
+sailfish_quant.sf3.tab	1.03464248515867