diff limma_voom.xml @ 4:a61a6e62e91f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 6a458881c0819b75e55e64b3f494679d43bb9ee8
author iuc
date Sun, 29 Apr 2018 17:36:42 -0400
parents 38aab66ae5cb
children d8a55b5f0de0
line wrap: on
line diff
--- a/limma_voom.xml	Wed Jan 31 12:45:42 2018 -0500
+++ b/limma_voom.xml	Sun Apr 29 17:36:42 2018 -0400
@@ -1,10 +1,10 @@
-<tool id="limma_voom" name="limma" version="3.34.6.0">
+<tool id="limma_voom" name="limma" version="3.34.9.0">
     <description>
         Perform differential expression with limma-voom or limma-trend
     </description>
 
     <requirements>
-        <requirement type="package" version="3.34.6">bioconductor-limma</requirement>
+        <requirement type="package" version="3.34.9">bioconductor-limma</requirement>
         <requirement type="package" version="3.20.7">bioconductor-edger</requirement>
         <requirement type="package" version="1.4.30">r-statmod</requirement>
         <requirement type="package" version="0.5.0">r-scales</requirement>
@@ -102,6 +102,10 @@
 
 &&
 cp '$outReport.files_path'/*.tsv output_dir/
+
+#if $out.rscript:
+    && cp '$__tool_directory__/limma_voom.R' '$rscript'
+#end if
     ]]></command>
 
     <inputs>
@@ -138,12 +142,12 @@
                     </param>
                     <repeat name="rep_group" title="Group" min="2" default="2">
                         <param name="groupName" type="text" label="Name"
-                        help="Name of group that the counts files(s) belong to (e.g. WT or Mut). NOTE: Please only use letters, numbers or underscores (case sensitive).">
+                        help="Name of group that the counts files belong to (e.g. WT or Mut). NOTE: Please only use letters, numbers or underscores (case sensitive).">
                         <sanitizer>
                             <valid initial="string.letters,string.digits"><add value="_" /></valid>
                         </sanitizer>
                         </param>
-                        <param name="countsFile" type="data" format="tabular" multiple="true" label="Counts file(s)"/>
+                        <param name="countsFile" type="data" format="tabular" multiple="true" label="Counts files"/>
                     </repeat>
                 </repeat>
             </when>
@@ -245,6 +249,7 @@
                 label="Output Normalised Counts Table?"
                 help="Output a file containing the normalised counts, these are in log2 counts per million (logCPM). Default: No">
             </param>
+            <param name="rscript" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used will be provided as a text file in the output. Default: No"/>
             <param name="rdaOption" type="boolean" truevalue="1" falsevalue="0" checked="false"
                 label="Output RData file?"
                 help="Output all the data used by R to construct the plots and tables, can be loaded into R. A link to the RData file will be provided in the HTML report. Default: No">
@@ -281,6 +286,9 @@
         <collection name="outTables" type="list" label="${tool.name} on ${on_string}: Tables">
             <discover_datasets pattern="(?P&lt;name&gt;.+)\.tsv$" format="tabular" directory="output_dir" visible="false" />
         </collection>
+        <data name="rscript" format="txt" label="${tool.name} on ${on_string}: Rscript">
+            <filter>out['rscript']</filter>
+        </data>
     </outputs>
 
     <tests>
@@ -300,8 +308,18 @@
             </repeat>
             <param name="normalisationOption" value="TMM" />
             <output_collection name="outTables" count="2">
-                <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WT.tsv" />
-                <element name="limma-voom_WT-Mut" ftype="tabular" file="limma-voom_WT-Mut.tsv" />
+                <element name="limma-voom_Mut-WT" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="GeneID.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" />
+                        <has_text_matching expression="11304.*0.4573" />
+                    </assert_contents>
+                </element>
+                <element name="limma-voom_WT-Mut" ftype="tabular" >
+                     <assert_contents>
+                        <has_text_matching expression="GeneID.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" />
+                        <has_text_matching expression="11304.*-0.4573" />
+                    </assert_contents>
+                </element>
             </output_collection>
             <output name="outReport" >
                 <assert_contents>
@@ -309,7 +327,7 @@
                     <not_has_text text="RData" />
                 </assert_contents>
             </output>
-        </test>
+       </test>
         <!-- Ensure annotation file input works -->
         <test>
             <param name="format" value="matrix" />
@@ -325,12 +343,18 @@
             </repeat>
             <param name="normalisationOption" value="TMM" />
             <output_collection name="outTables" count="1">
-                <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WT_anno.tsv" />
+                <element name="limma-voom_Mut-WT" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="EntrezID.*Symbol.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" />
+                        <has_text_matching expression="11304.*Abca4.*0.4573" />
+                    </assert_contents>
+                </element>
             </output_collection>
         </test>
-        <!-- Ensure RData file can be output -->
+        <!-- Ensure Rscript and RData file can be output -->
         <test>
             <param name="format" value="matrix" />
+            <param name="rscript" value="True"/>
             <param name="rdaOption" value="true" />
             <param name="counts" value="matrix.txt" />
             <repeat name="rep_factor">
@@ -346,6 +370,7 @@
                     <has_text text="RData" />
                 </assert_contents>
             </output>
+            <output name="rscript" value="out_rscript.txt"/>
         </test>
         <!-- Ensure secondary factors work -->
         <test>
@@ -364,7 +389,12 @@
             </repeat>
             <param name="normalisationOption" value="TMM" />
             <output_collection name="outTables" count="1" >
-                <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WT_2fact.tsv" />
+                <element name="limma-voom_Mut-WT" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="GeneID.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" />
+                        <has_text_matching expression="11304.*0.4590" />
+                    </assert_contents>
+                </element>
             </output_collection>
         </test>
         <!-- Ensure factors file input works -->
@@ -378,7 +408,12 @@
             </repeat>
             <param name="normalisationOption" value="TMM" />
             <output_collection name="outTables" count="1">
-                <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WT_2fact.tsv" />
+                <element name="limma-voom_Mut-WT" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="GeneID.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" />
+                        <has_text_matching expression="11304.*0.4590" />
+                    </assert_contents>
+                </element>
             </output_collection>
         </test>
         <!-- Ensure normalised counts file output works-->
@@ -395,8 +430,18 @@
             </repeat>
             <param name="normalisationOption" value="TMM" />
             <output_collection name="outTables" count="2">
-                <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WT.tsv" />
-                <element name="limma-voom_normcounts" ftype="tabular" file="limma-voom_normcounts.tsv" />
+                <element name="limma-voom_Mut-WT" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="GeneID.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" />
+                        <has_text_matching expression="11304.*0.4573" />
+                    </assert_contents>
+                </element>
+                <element name="limma-voom_normcounts" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="GeneID.*Mut1.*Mut2.*Mut3.*WT1.*WT2.*WT3" />
+                        <has_text_matching expression="11304.*15.7545" />
+                    </assert_contents>
+                </element>
             </output_collection>
         </test>
         <!-- Ensure multiple counts files input works -->
@@ -438,9 +483,24 @@
             </repeat>
             <param name="normCounts" value="true" />
             <output_collection name="outTables" count="3">
-                <element name="limma-voom_Mut-WT" ftype="tabular" file="limma-voom_Mut-WT_2fact_anno.tsv" />
-                <element name="limma-voom_WT-Mut" ftype="tabular" file="limma-voom_WT-Mut_2fact_anno.tsv" />
-                <element name="limma-voom_normcounts" ftype="tabular" file="limma-voom_normcounts_anno.tsv" />
+                <element name="limma-voom_Mut-WT" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" />
+                        <has_text_matching expression="11304.*Abca4.*0.4590" />
+                    </assert_contents>
+                </element>
+                <element name="limma-voom_WT-Mut" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="EntrezID.*Symbol.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" />
+                        <has_text_matching expression="11304.*Abca4.*-0.4590" />
+                    </assert_contents>
+                </element>
+                <element name="limma-voom_normcounts" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="EntrezID.*Symbol.*Mut1.*Mut2.*Mut3.*WT1.*WT2.*WT3" />
+                        <has_text_matching expression="11304.*Abca4.*15.7545" />
+                    </assert_contents>
+                </element>
             </output_collection>
         </test>
         <!-- Ensure limma-trend option works -->
@@ -463,7 +523,42 @@
                 </assert_contents>
             </output>
             <output_collection name="outTables" count="1">
-                <element name="limma-trend_Mut-WT" ftype="tabular" file="limma-trend_Mut-WT.tsv" />
+                <element name="limma-trend_Mut-WT" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="GeneID.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" />
+                        <has_text_matching expression="11304.*0.4540" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <!-- Ensure limma-trend option with annotation works -->
+        <test>
+            <param name="format" value="matrix" />
+            <param name="counts" value="matrix.txt" />
+            <param name="annoOpt" value="yes" />
+            <param name="geneanno" value="anno.txt" />
+            <repeat name="rep_factor">
+                <param name="factorName" value="Genotype"/>
+                <param name="groupNames" value="Mut,Mut,Mut,WT,WT,WT" />
+            </repeat>
+            <repeat name="rep_contrast">
+                <param name="contrast" value="Mut-WT" />
+            </repeat>
+            <param name="normalisationOption" value="TMM" />
+            <param name="de_select" value="trend" />
+            <param name="rdaOption" value="true" />
+            <output name="outReport" >
+                <assert_contents>
+                    <has_text text="The limma-trend method was used" />
+                </assert_contents>
+            </output>
+            <output_collection name="outTables" count="1">
+                <element name="limma-trend_Mut-WT" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="EntrezID.*Symbol.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" />
+                        <has_text_matching expression="11304.*0.4540" />
+                    </assert_contents>
+                </element>
             </output_collection>
         </test>
     </tests>
@@ -488,7 +583,6 @@
 ratio of the largest library size to the smallest is not more than about 3-fold. When the library sizes are quite variable between samples, then the voom approach is theoretically more powerful than limma-trend. For more information see the excellent `limma User's Guide`_.
 
 **Counts Data:**
-
 The counts data can either be input as separate counts files (one sample per file) or a single count matrix (one sample per column). The rows correspond to genes, and columns correspond to the counts for the samples. Values must be tab separated, with the first row containing the sample/column labels and the first column containing the row/gene labels. Gene identifiers can be of any type but must be unique and not repeated within a counts file.
 
 Example - **Separate Count Files**:
@@ -520,19 +614,19 @@
 **Gene Annotations:**
 Optional input for gene annotations, this can contain more
 information about the genes than just an ID number. The annotations will
-be available in the differential expression results table and the optional normalised counts table.
+be available in the differential expression results table and the optional normalised counts table. The file must contain a header row and have the gene IDs in the first column. The number of rows should match that of the counts files, add NA for any gene IDs with no annotation. The Galaxy tool **annotateMyIDs** can be used to obtain annotations for human, mouse, fly and zebrafish.
 
 Example:
 
     ==========  ==========  ===================================================
     **GeneID**  **Symbol**  **GeneName**
     ----------  ----------  ---------------------------------------------------
-    1287        Pzp         pregnancy zone protein
-    1298        Aanat       arylalkylamine N-acetyltransferase
-    1302        Aatk        apoptosis-associated tyrosine kinase
-    1303        Abca1       ATP-binding cassette, sub-family A (ABC1), member 1
-    1304        Abca4       ATP-binding cassette, sub-family A (ABC1), member 4
-    1305        Abca2       ATP-binding cassette, sub-family A (ABC1), member 2
+    11287       Pzp         pregnancy zone protein
+    11298       Aanat       arylalkylamine N-acetyltransferase
+    11302       Aatk        apoptosis-associated tyrosine kinase
+    11303       Abca1       ATP-binding cassette, sub-family A (ABC1), member 1
+    11304       Abca4       ATP-binding cassette, sub-family A (ABC1), member 4
+    11305       Abca2       ATP-binding cassette, sub-family A (ABC1), member 2
     ==========  ==========  ===================================================
 
 **Factor Information:**
@@ -556,24 +650,6 @@
 *Groups:* The names of the groups for the factor. These must be entered in the same order as the samples (to which the groups correspond) are listed in the columns of the counts matrix. Spaces must not be used and if entered into the tool form above, the values should be separated by commas.
 
 
-**Gene Annotations:**
-Optional input for gene annotations, this can contain more
-information about the genes than just an ID number. The annotations will
-be available in the differential expression results table and the optional normalised counts table.
-
-Example:
-
-    ==========  ==========  ===================================================
-    **GeneID**  **Symbol**  **GeneName**
-    ----------  ----------  ---------------------------------------------------
-    1287        Pzp         pregnancy zone protein
-    1298        Aanat       arylalkylamine N-acetyltransferase
-    1302        Aatk        apoptosis-associated tyrosine kinase
-    1303        Abca1       ATP-binding cassette, sub-family A (ABC1), member 1
-    1304        Abca4       ATP-binding cassette, sub-family A (ABC1), member 4
-    1305        Abca2       ATP-binding cassette, sub-family A (ABC1), member 2
-    ==========  ==========  ===================================================
-
 **Contrasts of Interest:**
 The contrasts you wish to make between levels.
 A common contrast would be a simple difference between two levels: "Mut-WT"
@@ -675,6 +751,7 @@
 Optionally, under **Output Options** you can choose to output
 
     * a normalised counts table
+    * the R script used by this tool
     * an RData file
 
 -----