changeset 27:d027d1f4984e draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit c8e7020d39d581d7374a13cf94c64998a6481e05"
author iuc
date Mon, 28 Jun 2021 05:14:25 +0000
parents 6a3a025714d3
children 7ff33c2d525b
files deseq2.R deseq2.xml deseq2_macros.xml
diffstat 3 files changed, 153 insertions(+), 93 deletions(-) [+]
line wrap: on
line diff
--- a/deseq2.R	Fri Jan 08 20:29:54 2021 +0000
+++ b/deseq2.R	Mon Jun 28 05:14:25 2021 +0000
@@ -67,7 +67,8 @@
   "outlier_replace_off", "a", 0, "logical",
   "outlier_filter_off", "b", 0, "logical",
   "auto_mean_filter_off", "c", 0, "logical",
-  "beta_prior_off", "d", 0, "logical"
+  "beta_prior_off", "d", 0, "logical",
+  "alpha_ma", "A", 1, "numeric"
 ), byrow = TRUE, ncol = 4)
 opt <- getopt(spec)
 
@@ -200,7 +201,7 @@
     text(x = c(0, length(h1$counts)), y = 0, label = paste(c(0, 1)), adj = c(0.5, 1.7), xpd = NA)
     legend("topright", fill = rev(colori), legend = rev(names(colori)), bg = "white")
   }
-    plotMA(res, main = paste("MA-plot for", title_suffix), ylim = range(res$log2FoldChange, na.rm = TRUE))
+    plotMA(res, main = paste("MA-plot for", title_suffix), ylim = range(res$log2FoldChange, na.rm = TRUE), alpha = opt$alpha_ma)
 }
 
 if (verbose) {
--- a/deseq2.xml	Fri Jan 08 20:29:54 2021 +0000
+++ b/deseq2.xml	Mon Jun 28 05:14:25 2021 +0000
@@ -1,20 +1,11 @@
-<tool id="deseq2" name="DESeq2" version="2.11.40.6+galaxy1">
+<tool id="deseq2" name="DESeq2" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@">
     <description>Determines differentially expressed features from count tables</description>
     <macros>
         <import>deseq2_macros.xml</import>
     </macros>
-    <requirements>
-        <requirement type="package" version="1.22.1">bioconductor-deseq2</requirement>
-        <!-- Optional dependency of tximport, needed to import kallisto results https://github.com/galaxyproject/usegalaxy-playbook/issues/161 -->
-        <requirement type="package" version="2.26.2">bioconductor-rhdf5</requirement>
-        <requirement type="package" version="1.10.0">bioconductor-tximport</requirement>
-        <requirement type="package" version="1.34.1">bioconductor-genomicfeatures</requirement>
-        <requirement type="package" version="1.20.2">r-getopt</requirement>
-        <requirement type="package" version="0.8.0">r-ggrepel</requirement>
-        <requirement type="package" version="3.0.1">r-gplots</requirement>
-        <requirement type="package" version="1.0.10">r-pheatmap</requirement>
-        <requirement type="package" version="0.2.20">r-rjson</requirement>
-    </requirements>
+    <expand macro='requirements'/>
+    <expand macro='edam_ontology' />
+    <expand macro='xrefs'/>
     <stdio>
         <regex match="Execution halted"
            source="both"
@@ -49,16 +40,17 @@
 Rscript '${__tool_directory__}/deseq2.R'
     --cores \${GALAXY_SLOTS:-1}
     -o '$deseq_out'
-    #if $pdf:
+    #if 'pdf' in $output_options.output_selector:
         -p '$plots'
     #end if
-    #if $normCounts:
+    -A $output_options.alpha_ma
+    #if 'normCounts' in $output_options.output_selector:
         -n '$counts_out'
     #end if
-    #if $normRLog:
+    #if 'normRLog' in $output_options.output_selector:
         -r '$rlog_out'
     #end if
-    #if $normVST:
+    #if 'normVST' in $output_options.output_selector:
         -v '$vst_out'
     #end if
     #set $filename_to_element_identifiers = {}
@@ -90,23 +82,23 @@
 
     -f '#echo json.dumps(temp_factor_names)#'
     -l '#echo json.dumps(filename_to_element_identifiers)#'
-    #if $esf:
-        -e $esf
+    #if $advanced_options.esf:
+        -e $advanced_options.esf
     #end if
-    -t $fit_type
+    -t $advanced_options.fit_type
     #if $batch_factors:
         --batch_factors '$batch_factors'
     #end if
-    #if $outlier_replace_off:
+    #if $advanced_options.outlier_replace_off:
         -a
     #end if
-    #if $outlier_filter_off:
+    #if $advanced_options.outlier_filter_off:
         -b
     #end if
-    #if $auto_mean_filter_off:
+    #if $advanced_options.auto_mean_filter_off:
         -c
     #end if
-    #if $many_contrasts:
+    #if 'many_contrasts' in $output_options.output_selector
         -m
     #end if
     #if $tximport.tximport_selector == 'tximport':
@@ -168,71 +160,71 @@
             </when>
             <when value="count" />
         </conditional>
-        <param name="pdf" type="boolean" truevalue="1" falsevalue="0" checked="true"
-            label="Visualising the analysis results"
-            help="output an additional PDF files" />
-        <param name="normCounts" type="boolean" truevalue="1" falsevalue="0" checked="false"
-            label="Output normalized counts table" />
-        <param name="normRLog" type="boolean" truevalue="1" falsevalue="0" checked="false"
-            label="Output rLog normalized table" />
-        <param name="normVST" type="boolean" truevalue="1" falsevalue="0" checked="false"
-            label="Output VST normalized table" />
-        <param name="many_contrasts" type="boolean" truevalue="1" falsevalue="0" checked="false"
-            label="Output all levels vs all levels of primary factor (use when you have >2 levels for primary factor)"
-            help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" />
-        <param name="esf" type="select" label="(Optional) Method for estimateSizeFactors" 
-            help="Method for estimation: either 'ratio', 'poscounts', or 'iterate'. 'ratio' uses the standard median ratio method introduced in DESeq. 
-                The size factor is the median ratio of the sample over a 'pseudosample': for each gene, the geometric mean of all samples. 
-                'poscounts' and 'iterate' offer alternative estimators, which can be used even when all genes contain a sample with a zero (a problem 
-                for the default method, as the geometric mean becomes zero, and the ratio undefined). The 'poscounts' estimator deals with a gene with 
-                some zeros, by calculating a modified geometric mean by taking the n-th root of the product of the non-zero counts. This evolved out of 
-                use cases with Paul McMurdie's phyloseq package for metagenomic samples. The 'iterate' estimator iterates between estimating the dispersion 
-                with a design of ~1, and finding a size factor vector by numerically optimizing the likelihood of the ~1 model.">
-            <option value="" selected="true">No Selection (use default)</option>
-            <option value="ratio">ratio</option>
-            <option value="poscounts">poscounts</option>
-            <option value="iterate">iterate</option>
-        </param>
-        <param name="fit_type" type="select" label="Fit type">
-            <option value="1" selected="true">parametric</option>
-            <option value="2">local</option>
-            <option value="3">mean</option>
-        </param>
-        <param name="outlier_replace_off" type="boolean" truevalue="1" falsevalue="0" checked="false"
-            label="Turn off outliers replacement (only affects with >6 replicates)"
-            help="When there are more than 6 replicates for a given sample, the DESeq2 will automatically replace
-                counts with large Cook’s distance with the trimmed mean over all samples, scaled up by the size factor
-                or normalization factor for that sample" />
-        <param name="outlier_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false"
-            label="Turn off outliers filtering (only affects with >2 replicates)"
-            help="When there are more than 2 replicates for a given sample, the DESeq2 will automatically
-                filter genes which contain a Cook’s distance above a cutoff" />
-        <param name="auto_mean_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false"
-            label="Turn off independent filtering"
-            help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" />
+        <section name="advanced_options" title="Advanced options">
+            <param name="esf" type="select" label="Method for estimateSizeFactors" 
+                help="Method for estimation: either 'ratio', 'poscounts', or 'iterate'. 'ratio' uses the standard median ratio method introduced in DESeq. 
+                    The size factor is the median ratio of the sample over a 'pseudosample': for each gene, the geometric mean of all samples. 
+                    'poscounts' and 'iterate' offer alternative estimators, which can be used even when all genes contain a sample with a zero (a problem 
+                    for the default method, as the geometric mean becomes zero, and the ratio undefined). The 'poscounts' estimator deals with a gene with 
+                    some zeros, by calculating a modified geometric mean by taking the n-th root of the product of the non-zero counts. This evolved out of 
+                    use cases with Paul McMurdie's phyloseq package for metagenomic samples. The 'iterate' estimator iterates between estimating the dispersion 
+                    with a design of ~1, and finding a size factor vector by numerically optimizing the likelihood of the ~1 model.">
+                <option value="" selected="true">No Selection (use default)</option>
+                <option value="ratio">ratio</option>
+                <option value="poscounts">poscounts</option>
+                <option value="iterate">iterate</option>
+            </param>
+            <param name="fit_type" type="select" label="Fit type">
+                <option value="1" selected="true">parametric</option>
+                <option value="2">local</option>
+                <option value="3">mean</option>
+            </param>
+            <param name="outlier_replace_off" type="boolean" truevalue="1" falsevalue="0" checked="false"
+                label="Turn off outliers replacement (only affects with >6 replicates)"
+                help="When there are more than 6 replicates for a given sample, the DESeq2 will automatically replace
+                    counts with large Cook’s distance with the trimmed mean over all samples, scaled up by the size factor
+                    or normalization factor for that sample" />
+            <param name="outlier_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false"
+                label="Turn off outliers filtering (only affects with >2 replicates)"
+                help="When there are more than 2 replicates for a given sample, the DESeq2 will automatically
+                    filter genes which contain a Cook’s distance above a cutoff" />
+            <param name="auto_mean_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false"
+                label="Turn off independent filtering"
+                help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" />
+        </section>
+        <section name="output_options" title="Output options">
+            <param name="output_selector" type="select" multiple="True" display="checkboxes" label="Output selector">
+                <option value="pdf" selected="True">Generate plots for visualizing the analysis results</option>
+                <option value="normCounts">Output rLog normalized table</option>
+                <option value="normVST">Output VST normalized table</option>
+                <option value="normRLog">Output rLog normalized table</option>
+                <option value="many_contrasts">Output all levels vs all levels of primary factor (use when you have >2 levels for primary factor)</option>
+            </param>
+            <param name="alpha_ma" type="float" min="0" max="0.5" value="0.1" label="Alpha value for MA-plot" help="Default value is 0.1. This option is only meaninful when plots are generated" />
+        </section>
     </inputs>
     <outputs>
         <data name="deseq_out" format="tabular" label="DESeq2 result file on ${on_string}">
-            <filter>many_contrasts is False</filter>
+            <filter>'many_contrasts' not in output_options['output_selector']</filter>
             <actions>
                 <action name="column_names" type="metadata" default="GeneID,Base mean,log2(FC),StdErr,Wald-Stats,P-value,P-adj" />
             </actions>
         </data>
         <collection name="split_output" type="list" label="DESeq2 result files on ${on_string}">
-            <filter>many_contrasts is True</filter>
+            <filter>output_options['output_selector'] and 'many_contrasts' in output_options['output_selector']</filter>
             <discover_datasets pattern="(?P&lt;designation&gt;.+_vs_.+)" format="tabular" directory="." visible="false"/>
         </collection>
         <data name="plots" format="pdf" label="DESeq2 plots on ${on_string}">
-            <filter>pdf == True</filter>
+            <filter>output_options['output_selector'] and 'pdf' in output_options['output_selector']</filter>
         </data>
         <data name="counts_out" format="tabular" label="Normalized counts file on ${on_string}">
-            <filter>normCounts == True</filter>
+            <filter>output_options['output_selector'] and 'normCounts' in output_options['output_selector']</filter>
         </data>
         <data name="rlog_out" format="tabular" label="rLog-Normalized counts file on ${on_string}">
-            <filter>normRLog == True</filter>
+            <filter>output_options['output_selector'] and 'normRLog' in output_options['output_selector']</filter>
         </data>
         <data name="vst_out" format="tabular" label="VST-Normalized counts file on ${on_string}">
-            <filter>normVST == True</filter>
+            <filter>output_options['output_selector'] and 'normVST' in output_options['output_selector']</filter>
         </data>
     </outputs>
     <tests>
@@ -249,10 +241,9 @@
                     <param name="countsFile" value="GSM461176_untreat_single.counts,GSM461177_untreat_paired.counts,GSM461178_untreat_paired.counts,GSM461182_untreat_single.counts"/>
                 </repeat>
             </repeat>
-            <param name="pdf" value="False"/>
-            <param name="normCounts" value="True"/>
-            <param name="normRLog" value="True"/>
-            <param name="normVST" value="True"/>
+            <section name="output_options">
+                <param name="output_selector" value="normCounts,normRLog,normVST"/>
+            </section>
             <output name="counts_out">
                 <assert_contents>
                     <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts" />
@@ -291,8 +282,9 @@
                 </repeat>
             </repeat>
             <param name="batch_factors" value="batch_factors.tab"/>
-            <param name="pdf" value="False"/>
-            <param name="normCounts" value="True"/>
+            <section name="output_options">
+                <param name="output_selector" value="normCounts"/>
+            </section>
             <output name="deseq_out">
                 <assert_contents>
                     <has_text_matching expression="FBgn0003360\t1933.*\t-2.9.*\t0.1.*\t-26.*\t1.*-152\t4.*-149" />
@@ -313,10 +305,9 @@
                 </repeat>
             </repeat>
             <param name="header" value="False"/>
-            <param name="pdf" value="False"/>
-            <param name="normCounts" value="True"/>
-            <param name="normRLog" value="True"/>
-            <param name="normVST" value="True"/>
+            <section name="output_options">
+                <param name="output_selector" value="normCounts,normRLog,normVST"/>
+            </section>        
             <output name="counts_out">
                 <assert_contents>
                     <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader" />
@@ -354,7 +345,9 @@
                     <param name="countsFile" value="sailfish/sailfish_quant.sf4.tab,sailfish/sailfish_quant.sf5.tab,sailfish/sailfish_quant.sf6.tab"/>
                 </repeat>
             </repeat>
-            <param name="pdf" value="False"/>
+            <section name="output_options">
+                <param name="output_selector" value=""/>
+            </section>            
             <param name="tximport_selector" value="tximport"/>
             <param name="txtype" value="sailfish"/>
             <param name="mapping_format_selector" value="tabular"/>
@@ -378,7 +371,9 @@
                     <param name="countsFile" value="sailfish/sailfish_quant.sf4.tab,sailfish/sailfish_quant.sf5.tab,sailfish/sailfish_quant.sf6.tab"/>
                 </repeat>
             </repeat>
-            <param name="pdf" value="False"/>
+            <section name="output_options">
+                <param name="output_selector" value=""/>
+            </section>            
             <param name="tximport_selector" value="tximport"/>
             <param name="txtype" value="sailfish"/>
             <param name="mapping_format_selector" value="gtf"/>
@@ -402,7 +397,9 @@
                     <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/>
                 </repeat>
             </repeat>
-            <param name="pdf" value="False"/>
+            <section name="output_options">
+                <param name="output_selector" value=""/>
+            </section>            
             <param name="tximport_selector" value="tximport"/>
             <param name="txtype" value="sailfish"/>
             <param name="mapping_format_selector" value="gtf"/>
@@ -437,7 +434,9 @@
                     <param name="groups" value="primary:untreated"/>
                 </repeat>
             </repeat>
-            <param name="pdf" value="False"/>
+            <section name="output_options">
+                <param name="output_selector" value=""/>
+            </section>            
             <param name="tximport_selector" value="tximport"/>
             <param name="txtype" value="sailfish"/>
             <param name="mapping_format_selector" value="tabular"/>
@@ -472,12 +471,13 @@
                     <param name="groups" value="primary:untreated"/>
                 </repeat>
             </repeat>
-            <param name="pdf" value="False"/>
+            <section name="output_options">
+                <param name="output_selector" value="many_contrasts"/>
+            </section>            
             <param name="tximport_selector" value="tximport"/>
             <param name="txtype" value="sailfish"/>
             <param name="mapping_format_selector" value="tabular"/>
             <param name="tabular_file" value="tx2gene.tab"/>
-            <param name="many_contrasts" value="true"/>
             <output_collection name="split_output" type="list" count="1">
                 <element name="Treatment_Treated_vs_Untreated">
                     <assert_contents>
@@ -486,6 +486,33 @@
                 </element>
             </output_collection>
         </test>
+        <!--Test alpha_ma option-->
+        <test expect_num_outputs="1">
+            <repeat name="rep_factorName">
+                <param name="factorName" value="Treatment"/>
+                <repeat name="rep_factorLevel">
+                    <param name="factorLevel" value="Treated"/>
+                    <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf1.tab,sailfish_ensembl/sailfish_quant.sf2.tab,sailfish_ensembl/sailfish_quant.sf3.tab"/>
+                </repeat>
+                <repeat name="rep_factorLevel">
+                    <param name="factorLevel" value="Untreated"/>
+                    <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/>
+                </repeat>
+            </repeat>
+            <section name="output_options">
+                <param name="output_selector" value=""/>
+                <param name="alpha_ma" value="0.05"/>
+            </section>            
+            <param name="tximport_selector" value="tximport"/>
+            <param name="txtype" value="sailfish"/>
+            <param name="mapping_format_selector" value="gtf"/>
+            <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/>
+            <output name="deseq_out" >
+                <assert_contents>
+                    <has_text_matching expression="ENSG00000168671\t1.8841.*\t-0.1180.*\t0.7429.*\t-0.1589.*\t0.8737.*\t0.9999.*" />
+                </assert_contents>
+            </output>
+        </test>
     </tests>
     <help><![CDATA[
 .. class:: infomark
@@ -584,7 +611,5 @@
 .. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html
 .. _tximport: https://bioconductor.org/packages/devel/bioc/vignettes/tximport/inst/doc/tximport.html
     ]]></help>
-    <citations>
-        <citation type="doi">10.1186/s13059-014-0550-8</citation>
-    </citations>
+    <expand macro="citations" />
 </tool>
--- a/deseq2_macros.xml	Fri Jan 08 20:29:54 2021 +0000
+++ b/deseq2_macros.xml	Mon Jun 28 05:14:25 2021 +0000
@@ -18,4 +18,38 @@
             </repeat>
         </repeat>
     </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="1.22.1">bioconductor-deseq2</requirement>
+            <!-- Optional dependency of tximport, needed to import kallisto results https://github.com/galaxyproject/usegalaxy-playbook/issues/161 -->
+            <requirement type="package" version="2.26.2">bioconductor-rhdf5</requirement>
+            <requirement type="package" version="1.10.0">bioconductor-tximport</requirement>
+            <requirement type="package" version="1.34.1">bioconductor-genomicfeatures</requirement>
+            <requirement type="package" version="1.20.2">r-getopt</requirement>
+            <requirement type="package" version="0.8.0">r-ggrepel</requirement>
+            <requirement type="package" version="3.0.1">r-gplots</requirement>
+            <requirement type="package" version="1.0.10">r-pheatmap</requirement>
+            <requirement type="package" version="0.2.20">r-rjson</requirement>
+        </requirements>
+    </xml>
+    <token name="@TOOL_VERSION@">2.11.40.6</token>
+    <token name="@SUFFIX_VERSION@">2</token>
+    <xml name="edam_ontology">
+        <edam_topics>                                                                                  
+            <edam_topic>topic_3308</edam_topic>
+        </edam_topics>
+        <edam_operations>
+            <edam_operation>operation_3800</edam_operation>
+        </edam_operations>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1186/s13059-014-0550-8</citation>
+        </citations>
+    </xml>
+    <xml name="xrefs">
+        <xrefs>
+          <xref type='bio.tools'>DESeq2</xref>
+        </xrefs>
+      </xml>
 </macros>
\ No newline at end of file