changeset 2:a1634a9c2ee1 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/edger commit 910ffba48cb5f981aad1e00b77056bbbec7f9617
author iuc
date Thu, 19 Apr 2018 17:26:38 -0400
parents 2a16413ec60d
children d79ed3ec25fe
files edger.R edger.xml test-data/edgeR_Mut-WT.tsv test-data/edgeR_Mut-WT_2fact.tsv test-data/edgeR_Mut-WT_2fact_anno.tsv test-data/edgeR_Mut-WT_anno.tsv test-data/edgeR_Mut-WT_filt.tsv test-data/edgeR_WT-Mut.tsv test-data/edgeR_WT-Mut_2fact_anno.tsv test-data/edgeR_normcounts.tsv test-data/edgeR_normcounts_anno.tsv test-data/out_rscript.txt
diffstat 12 files changed, 818 insertions(+), 81 deletions(-) [+]
line wrap: on
line diff
--- a/edger.R	Tue Jan 30 04:07:08 2018 -0500
+++ b/edger.R	Thu Apr 19 17:26:38 2018 -0400
@@ -486,9 +486,9 @@
     sumStatus <- summary(status)
 
     # Collect counts for differential expression
-    upCount[i] <- sumStatus["Up"]
-    downCount[i] <- sumStatus["Down"]
-    flatCount[i] <- sumStatus["NotSig"]
+    upCount[i] <- sumStatus["Up", ]
+    downCount[i] <- sumStatus["Down", ]
+    flatCount[i] <- sumStatus["NotSig", ]
                                              
     # Write top expressions table
     top <- topTags(res, n=Inf, sort.by="PValue")
--- a/edger.xml	Tue Jan 30 04:07:08 2018 -0500
+++ b/edger.xml	Thu Apr 19 17:26:38 2018 -0400
@@ -1,10 +1,11 @@
-<tool id="edger" name="edgeR" version="3.20.7.0">
+<tool id="edger" name="edgeR" version="3.20.7.1">
     <description>
         Perform differential expression of count data
     </description>
 
     <requirements>
         <requirement type="package" version="3.20.7">bioconductor-edger</requirement>
+        <requirement type="package" version="3.34.9">bioconductor-limma</requirement>
         <requirement type="package" version="0.2.15">r-rjson</requirement>
         <requirement type="package" version="1.20.0">r-getopt</requirement>
         <requirement type="package" version="1.4.30">r-statmod</requirement>
@@ -13,7 +14,7 @@
     </requirements>
 
     <version_command><![CDATA[
-echo $(R --version | grep version | grep -v GNU)", edgeR version" $(R --vanilla --slave -e "library(edgeR); cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", scales version" $(R --vanilla --slave -e "library(scales); cat(sessionInfo()\$otherPkgs\$scales\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rjson version" $(R --vanilla --slave -e "library(rjson); cat(sessionInfo()\$otherPkgs\$rjson\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", getopt version" $(R --vanilla --slave -e "library(getopt); cat(sessionInfo()\$otherPkgs\$getopt\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+echo $(R --version | grep version | grep -v GNU)", edgeR version" $(R --vanilla --slave -e "library(edgeR); cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", limma version" $(R --vanilla --slave -e "library(limma); cat(sessionInfo()\$otherPkgs\$limma\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", scales version" $(R --vanilla --slave -e "library(scales); cat(sessionInfo()\$otherPkgs\$scales\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rjson version" $(R --vanilla --slave -e "library(rjson); cat(sessionInfo()\$otherPkgs\$rjson\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", getopt version" $(R --vanilla --slave -e "library(getopt); cat(sessionInfo()\$otherPkgs\$getopt\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
     ]]></version_command>
 
     <command detect_errors="exit_code"><![CDATA[
@@ -94,6 +95,10 @@
 
 &&
 cp '$outReport.files_path'/*.tsv output_dir/
+
+#if $out.rscript:
+    && cp '$__tool_directory__/edger.R' '$rscript'
+#end if
     ]]></command>
 
     <inputs>
@@ -222,6 +227,7 @@
                 label="Output Normalised Counts Table?"
                 help="Output a file containing the normalised counts, these are in log2 counts per million (logCPM). Default: No">
             </param>
+            <param name="rscript" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used will be provided as a text file in the output. Default: No"/>
             <param name="rdaOption" type="boolean" truevalue="1" falsevalue="0" checked="false"
                 label="Output RData file?"
                 help="Output all the data used by R to construct the plots and tables, can be loaded into R. A link to the RData file will be provided in the HTML report. Default: No">
@@ -259,6 +265,9 @@
         <collection name="outTables" type="list" label="${tool.name} on ${on_string}: Tables">
             <discover_datasets pattern="(?P&lt;name&gt;.+)\.tsv$" format="tabular" directory="output_dir" visible="false" />
         </collection>
+        <data name="rscript" format="txt" label="${tool.name} on ${on_string}: Rscript">
+            <filter>out['rscript']</filter>
+        </data>
     </outputs>
 
     <tests>
@@ -278,8 +287,18 @@
             </repeat>
             <param name="normalisationOption" value="TMM" />
             <output_collection name="outTables" count="2">
-                <element name="edgeR_Mut-WT" ftype="tabular" file="edgeR_Mut-WT.tsv" />
-                <element name="edgeR_WT-Mut" ftype="tabular" file="edgeR_WT-Mut.tsv" />
+                <element name="edgeR_Mut-WT" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="GeneID.*logFC.*logCPM.*F.*PValue.*FDR" />
+                        <has_text_matching expression="11304.*0.4582" />
+                   </assert_contents>
+               </element>
+                <element name="edgeR_WT-Mut" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="GeneID.*logFC.*logCPM.*F.*PValue.*FDR" />
+                        <has_text_matching expression="11304.*-0.4582" />
+                   </assert_contents>
+               </element>
             </output_collection>
             <output name="outReport" >
                 <assert_contents>
@@ -305,12 +324,18 @@
             </repeat>
             <param name="normalisationOption" value="TMM" />
             <output_collection name="outTables" count="1">
-                <element name="edgeR_Mut-WT" ftype="tabular" file="edgeR_Mut-WT_anno.tsv" />
+                <element name="edgeR_Mut-WT" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="EntrezID.*Symbol.*logFC.*logCPM.*F.*PValue.*FDR" />
+                        <has_text_matching expression="11304.*Abca4.*0.4582" />
+                   </assert_contents>
+               </element>
             </output_collection>
         </test>
-        <!-- Ensure RData file can be output -->
+        <!-- Ensure RScript and RData file can be output -->
         <test>
             <param name="format" value="matrix" />
+            <param name="rscript" value="True"/>
             <param name="rdaOption" value="true" />
             <param name="counts" value="matrix.txt" />
             <repeat name="rep_factor">
@@ -326,6 +351,7 @@
                     <has_text text="RData" />
                 </assert_contents>
             </output>
+            <output name="rscript" value="out_rscript.txt"/>
         </test>
         <!-- Ensure secondary factors work -->
         <test>
@@ -344,7 +370,12 @@
             </repeat>
             <param name="normalisationOption" value="TMM" />
             <output_collection name="outTables" count="1" >
-                <element name="edgeR_Mut-WT" ftype="tabular" file="edgeR_Mut-WT_2fact.tsv" />
+                <element name="edgeR_Mut-WT" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="GeneID.*logFC.*logCPM.*F.*PValue.*FDR" />
+                        <has_text_matching expression="11304.*0.4584" />
+                   </assert_contents>
+               </element>
             </output_collection>
         </test>
         <!-- Ensure factors file input works -->
@@ -358,7 +389,12 @@
             </repeat>
             <param name="normalisationOption" value="TMM" />
             <output_collection name="outTables" count="1">
-                <element name="edgeR_Mut-WT" ftype="tabular" file="edgeR_Mut-WT_2fact.tsv" />
+                <element name="edgeR_Mut-WT" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="GeneID.*logFC.*logCPM.*F.*PValue.*FDR" />
+                        <has_text_matching expression="11304.*0.4584" />
+                   </assert_contents>
+               </element>
             </output_collection>
         </test>
         <!-- Ensure normalised counts file output works-->
@@ -375,8 +411,18 @@
             </repeat>
             <param name="normalisationOption" value="TMM" />
             <output_collection name="outTables" count="2">
-                <element name="edgeR_Mut-WT" ftype="tabular" file="edgeR_Mut-WT.tsv" />
-                <element name="edgeR_normcounts" ftype="tabular" file="edgeR_normcounts.tsv" />
+                <element name="edgeR_Mut-WT" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="GeneID.*logFC.*logCPM.*F.*PValue.*FDR" />
+                        <has_text_matching expression="11304.*0.4582" />
+                   </assert_contents>
+               </element>
+                <element name="edgeR_normcounts" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="GeneID.*Mut1.*Mut2.*Mut3.*WT1.*WT2.*WT3" />
+                        <has_text_matching expression="11304.*15.7535" />
+                   </assert_contents>
+               </element>
             </output_collection>
         </test>
         <!-- Ensure likelihood ratio option works -->
@@ -438,9 +484,24 @@
             </repeat>
             <param name="normCounts" value="true" />
             <output_collection name="outTables" count="3">
-                <element name="edgeR_Mut-WT" ftype="tabular" file="edgeR_Mut-WT_2fact_anno.tsv" />
-                <element name="edgeR_WT-Mut" ftype="tabular" file="edgeR_WT-Mut_2fact_anno.tsv" />
-                <element name="edgeR_normcounts" ftype="tabular" file="edgeR_normcounts_anno.tsv" />
+                <element name="edgeR_Mut-WT" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="EntrezID.*Symbol.*logFC.*logCPM.*F.*PValue.*FDR" />
+                        <has_text_matching expression="11304.*Abca4.*0.4584" />
+                   </assert_contents>
+               </element>
+                <element name="edgeR_WT-Mut" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="logFC.*logCPM.*F.*PValue.*FDR" />
+                        <has_text_matching expression="11304.*Abca4.*-0.4584" />
+                   </assert_contents>
+               </element>
+                <element name="edgeR_normcounts" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="Mut1.*Mut2.*Mut3.*WT1.*WT2.*WT3" />
+                        <has_text_matching expression="11304.*Abca4.*15.7535" />
+                   </assert_contents>
+               </element>
             </output_collection>
         </test>
         <!-- Ensure filtering on CPM in Mnimum Samples works -->
@@ -469,7 +530,13 @@
                 </assert_contents>
             </output>
             <output_collection name="outTables" count="1" >
-                <element name="edgeR_Mut-WT" ftype="tabular" file="edgeR_Mut-WT_filt.tsv" />
+                <element name="edgeR_Mut-WT" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="GeneID.*logFC.*logCPM.*F.*PValue.*FDR" />
+                        <has_text_matching expression="11304.*0.4568" />
+                        <not_has_text text="-0.0682" />
+                   </assert_contents>
+               </element>
             </output_collection>
         </test>
         <!-- Ensure filtering on Count in Minmum Samples works -->
@@ -497,7 +564,14 @@
                 </assert_contents>
             </output>
             <output_collection name="outTables" count="1" >
-                <element name="edgeR_Mut-WT" ftype="tabular" file="edgeR_Mut-WT_filt.tsv" />
+                <element name="edgeR_Mut-WT" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="GeneID.*logFC.*logCPM.*F.*PValue.*FDR" />
+                        <has_text_matching expression="11304.*0.4568" />
+                        <not_has_text text="-0.0682" />
+                   </assert_contents>
+               </element>
+
             </output_collection>
         </test>
         <!-- Ensure filtering on Total Count works -->
@@ -527,7 +601,13 @@
                 </assert_contents>
             </output>
             <output_collection name="outTables" count="1" >
-                <element name="edgeR_Mut-WT" ftype="tabular" file="edgeR_Mut-WT_filt.tsv" />
+                <element name="edgeR_Mut-WT" ftype="tabular" >
+                    <assert_contents>
+                        <has_text_matching expression="GeneID.*logFC.*logCPM.*F.*PValue.*FDR" />
+                        <has_text_matching expression="11304.*0.4568" />
+                        <not_has_text text="-0.0682" />
+                   </assert_contents>
+               </element>
             </output_collection>
         </test>
     </tests>
@@ -716,6 +796,7 @@
 Optionally, under **Output Options** you can choose to output
 
     * a normalised counts table
+    * the R script used by this tool
     * an RData file
 
 -----
--- a/test-data/edgeR_Mut-WT.tsv	Tue Jan 30 04:07:08 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-"GeneID"	"logFC"	"logCPM"	"F"	"PValue"	"FDR"
-"11304"	0.458203001410391	15.530162861746	32.6285109553746	6.943370724917e-06	4.1660224349502e-05
-"11287"	0.188840644104212	17.6536729774735	20.5671667733158	0.000135453949597801	0.000406361848793403
-"11298"	-0.138359578382475	17.6815280107154	10.8470695851279	0.00306012801564425	0.00612025603128849
-"11303"	-0.0561156581317604	17.8897677663033	1.50815092591008	0.231329593888878	0.346994390833318
-"11305"	-0.0579340818829784	18.1615839598046	1.09689306676368	0.305382540289637	0.366459048347564
-"11302"	-0.0682406105165454	10.0898264751075	0.137130529665157	0.884266488139469	0.884266488139469
--- a/test-data/edgeR_Mut-WT_2fact.tsv	Tue Jan 30 04:07:08 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-"GeneID"	"logFC"	"logCPM"	"F"	"PValue"	"FDR"
-"11287"	0.189281291475186	17.6499778192954	198.646314971919	7.90598427634242e-09	4.74359056580545e-08
-"11298"	-0.13798041694802	17.6843133699537	96.2224552671758	4.15830411749776e-06	1.24749123524933e-05
-"11304"	0.458490715244216	15.526484673111	14.5864146735617	0.00244295799161999	0.00488591598323999
-"11303"	-0.0560600217169691	17.8909334307093	6.53006937817236	0.0442859767053646	0.0664289650580469
-"11305"	-0.0585095825423414	18.1629882429457	1.07140336604322	0.32103822810743	0.385245873728915
-"11302"	-0.0716631320244627	10.0898336653124	0.376796260569999	0.878304702615846	0.878304702615846
--- a/test-data/edgeR_Mut-WT_2fact_anno.tsv	Tue Jan 30 04:07:08 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-"EntrezID"	"Symbol"	"GeneName"	"Chr"	"Length"	"logFC"	"logCPM"	"F"	"PValue"	"FDR"
-11287	"Pzp"	"pregnancy zone protein"	6	4681	0.189281947498313	17.6499778192954	198.646315096405	7.90598424818915e-09	4.74359054891349e-08
-11298	"Aanat"	"arylalkylamine N-acetyltransferase"	11	1455	-0.137980416947824	17.6843133699537	96.2224553233548	4.15830411749738e-06	1.24749123524921e-05
-11304	"Abca4"	"ATP-binding cassette, sub-family A (ABC1), member 4"	3	7248	0.45849071524422	15.526484673111	14.5864146737822	0.00244295799149183	0.00488591598298366
-11303	"Abca1"	"ATP-binding cassette, sub-family A (ABC1), member 1"	4	10260	-0.0560600215744048	17.8909334307093	6.53006938009001	0.0442859767053567	0.066428965058035
-11305	"Abca2"	"ATP-binding cassette, sub-family A (ABC1), member 2"	2	8061	-0.0585095828508861	18.1629882429457	1.07140336564628	0.321038228193371	0.385245873832045
-11302	"Aatk"	"apoptosis-associated tyrosine kinase"	11	5743	-0.0716631320197652	10.0898336653124	0.376796260576849	0.878304702615839	0.878304702615839
--- a/test-data/edgeR_Mut-WT_anno.tsv	Tue Jan 30 04:07:08 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-"EntrezID"	"Symbol"	"GeneName"	"Chr"	"Length"	"logFC"	"logCPM"	"F"	"PValue"	"FDR"
-11304	"Abca4"	"ATP-binding cassette, sub-family A (ABC1), member 4"	3	7248	0.458203001410391	15.530162861746	32.6285109553746	6.943370724917e-06	4.1660224349502e-05
-11287	"Pzp"	"pregnancy zone protein"	6	4681	0.188840644104212	17.6536729774735	20.5671667733158	0.000135453949597801	0.000406361848793403
-11298	"Aanat"	"arylalkylamine N-acetyltransferase"	11	1455	-0.138359578382475	17.6815280107154	10.8470695851279	0.00306012801564425	0.00612025603128849
-11303	"Abca1"	"ATP-binding cassette, sub-family A (ABC1), member 1"	4	10260	-0.0561156581317604	17.8897677663033	1.50815092591008	0.231329593888878	0.346994390833318
-11305	"Abca2"	"ATP-binding cassette, sub-family A (ABC1), member 2"	2	8061	-0.0579340818829784	18.1615839598046	1.09689306676368	0.305382540289637	0.366459048347564
-11302	"Aatk"	"apoptosis-associated tyrosine kinase"	11	5743	-0.0682406105165454	10.0898264751075	0.137130529665157	0.884266488139469	0.884266488139469
--- a/test-data/edgeR_Mut-WT_filt.tsv	Tue Jan 30 04:07:08 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-"GeneID"	"logFC"	"logCPM"	"F"	"PValue"	"FDR"
-"11287"	0.187201149217925	17.6526225386971	165.500659651998	5.18054239620105e-10	2.59027119810053e-09
-"11298"	-0.140077523013286	17.6838446963123	82.0496288033128	2.92613742709898e-06	7.31534356774746e-06
-"11304"	0.456820345055957	15.5288695886958	25.2675517854784	6.46433259176098e-05	0.00010773887652935
-"11303"	-0.0578468398229744	17.8912127135125	5.26103367901545	0.0384341523491632	0.048042690436454
-"11305"	-0.0593023205976883	18.1634104549086	0.864302521617601	0.363623540536245	0.363623540536245
--- a/test-data/edgeR_WT-Mut.tsv	Tue Jan 30 04:07:08 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-"GeneID"	"logFC"	"logCPM"	"F"	"PValue"	"FDR"
-"11304"	-0.458203001410391	15.530162861746	32.6285109553746	6.943370724917e-06	4.1660224349502e-05
-"11287"	-0.188840644104212	17.6536729774735	20.5671667733158	0.000135453949597801	0.000406361848793403
-"11298"	0.138359578382475	17.6815280107154	10.8470695851279	0.00306012801564425	0.00612025603128849
-"11303"	0.0561156581317604	17.8897677663033	1.50815092591008	0.231329593888878	0.346994390833318
-"11305"	0.0579340818829784	18.1615839598046	1.09689306676368	0.305382540289637	0.366459048347564
-"11302"	0.0682406105165454	10.0898264751075	0.137130529665157	0.884266488139469	0.884266488139469
--- a/test-data/edgeR_WT-Mut_2fact_anno.tsv	Tue Jan 30 04:07:08 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-"EntrezID"	"Symbol"	"GeneName"	"Chr"	"Length"	"logFC"	"logCPM"	"F"	"PValue"	"FDR"
-11287	"Pzp"	"pregnancy zone protein"	6	4681	-0.189281947498313	17.6499778192954	198.646315096405	7.90598424818915e-09	4.74359054891349e-08
-11298	"Aanat"	"arylalkylamine N-acetyltransferase"	11	1455	0.137980416947824	17.6843133699537	96.2224553233548	4.15830411749738e-06	1.24749123524921e-05
-11304	"Abca4"	"ATP-binding cassette, sub-family A (ABC1), member 4"	3	7248	-0.45849071524422	15.526484673111	14.5864146737822	0.00244295799149183	0.00488591598298366
-11303	"Abca1"	"ATP-binding cassette, sub-family A (ABC1), member 1"	4	10260	0.0560600215744048	17.8909334307093	6.53006938009001	0.0442859767053567	0.066428965058035
-11305	"Abca2"	"ATP-binding cassette, sub-family A (ABC1), member 2"	2	8061	0.0585095828508861	18.1629882429457	1.07140336564628	0.321038228193371	0.385245873832045
-11302	"Aatk"	"apoptosis-associated tyrosine kinase"	11	5743	0.0716631320197652	10.0898336653124	0.376796260576849	0.878304702615839	0.878304702615839
--- a/test-data/edgeR_normcounts.tsv	Tue Jan 30 04:07:08 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-"GeneID"	"Mut1"	"Mut2"	"Mut3"	"WT1"	"WT2"	"WT3"
-"11287"	17.7717801382127	17.7103668584544	17.7656984572699	17.6075444214943	17.5078565133576	17.5637960881114
-"11298"	17.6504754185442	17.55181161064	17.6142553019077	17.7726234935868	17.6985800110028	17.7597848438911
-"11302"	9.64041099082467	9.8551982993804	9.60469198931215	9.52851478148979	9.97869946791847	9.78190633986473
-"11303"	17.8772707356813	17.7864068634935	17.9114914356477	17.9125147871338	17.8772755854201	17.9551530504837
-"11304"	15.753577788623	15.8510977521242	15.6551142861549	15.3537170121875	15.2168364952853	15.3165751633072
-"11305"	18.0400277799982	18.1407817993511	18.2048423497925	18.1807759635442	18.1818136580236	18.2026167343562
--- a/test-data/edgeR_normcounts_anno.tsv	Tue Jan 30 04:07:08 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-"EntrezID"	"Symbol"	"GeneName"	"Chr"	"Length"	"Mut1"	"Mut2"	"Mut3"	"WT1"	"WT2"	"WT3"
-11287	"Pzp"	"pregnancy zone protein"	6	4681	17.7717801382127	17.7103668584544	17.7656984572699	17.6075444214943	17.5078565133576	17.5637960881114
-11298	"Aanat"	"arylalkylamine N-acetyltransferase"	11	1455	17.6504754185442	17.55181161064	17.6142553019077	17.7726234935868	17.6985800110028	17.7597848438911
-11302	"Aatk"	"apoptosis-associated tyrosine kinase"	11	5743	9.64041099082467	9.8551982993804	9.60469198931215	9.52851478148979	9.97869946791847	9.78190633986473
-11303	"Abca1"	"ATP-binding cassette, sub-family A (ABC1), member 1"	4	10260	17.8772707356813	17.7864068634935	17.9114914356477	17.9125147871338	17.8772755854201	17.9551530504837
-11304	"Abca4"	"ATP-binding cassette, sub-family A (ABC1), member 4"	3	7248	15.753577788623	15.8510977521242	15.6551142861549	15.3537170121875	15.2168364952853	15.3165751633072
-11305	"Abca2"	"ATP-binding cassette, sub-family A (ABC1), member 2"	2	8061	18.0400277799982	18.1407817993511	18.2048423497925	18.1807759635442	18.1818136580236	18.2026167343562
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_rscript.txt	Thu Apr 19 17:26:38 2018 -0400
@@ -0,0 +1,718 @@
+# This tool takes in a matrix of feature counts as well as gene annotations and
+# outputs a table of top expressions as well as various plots for differential
+# expression analysis
+#
+# ARGS: htmlPath", "R", 1, "character"      -Path to html file linking to other outputs
+#       outPath", "o", 1, "character"       -Path to folder to write all output to
+#       filesPath", "j", 2, "character"     -JSON list object if multiple files input
+#       matrixPath", "m", 2, "character"    -Path to count matrix
+#       factFile", "f", 2, "character"      -Path to factor information file
+#       factInput", "i", 2, "character"     -String containing factors if manually input  
+#       annoPath", "a", 2, "character"      -Path to input containing gene annotations
+#       contrastData", "C", 1, "character"  -String containing contrasts of interest
+#       cpmReq", "c", 2, "double"           -Float specifying cpm requirement
+#       cntReq", "z", 2, "integer"          -Integer specifying minimum total count requirement
+#       sampleReq", "s", 2, "integer"       -Integer specifying cpm requirement
+#       normCounts", "x", 0, "logical"      -String specifying if normalised counts should be output 
+#       rdaOpt", "r", 0, "logical"          -String specifying if RData should be output
+#       lfcReq", "l", 1, "double"           -Float specifying the log-fold-change requirement   
+#       pValReq", "p", 1, "double"          -Float specifying the p-value requirement
+#       pAdjOpt", "d", 1, "character"       -String specifying the p-value adjustment method 
+#       normOpt", "n", 1, "character"       -String specifying type of normalisation used 
+#       robOpt", "b", 0, "logical"          -String specifying if robust options should be used 
+#       lrtOpt", "t", 0, "logical"          -String specifying whether to perform LRT test instead 
+#
+# OUT: 
+#       MDS Plot 
+#       BCV Plot
+#       QL Plot
+#       MD Plot
+#       Expression Table
+#       HTML file linking to the ouputs
+# Optional:
+#       Normalised counts Table
+#       RData file
+#
+# Author: Shian Su - registertonysu@gmail.com - Jan 2014
+# Modified by: Maria Doyle - Oct 2017 (some code taken from the DESeq2 wrapper)
+
+# Record starting time
+timeStart <- as.character(Sys.time())
+
+# setup R error handling to go to stderr
+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+# Load all required libraries
+library(methods, quietly=TRUE, warn.conflicts=FALSE)
+library(statmod, quietly=TRUE, warn.conflicts=FALSE)
+library(splines, quietly=TRUE, warn.conflicts=FALSE)
+library(edgeR, quietly=TRUE, warn.conflicts=FALSE)
+library(limma, quietly=TRUE, warn.conflicts=FALSE)
+library(scales, quietly=TRUE, warn.conflicts=FALSE)
+library(getopt, quietly=TRUE, warn.conflicts=FALSE)
+
+################################################################################
+### Function Delcaration
+################################################################################
+# Function to sanitise contrast equations so there are no whitespaces
+# surrounding the arithmetic operators, leading or trailing whitespace
+sanitiseEquation <- function(equation) {
+    equation <- gsub(" *[+] *", "+", equation)
+    equation <- gsub(" *[-] *", "-", equation)
+    equation <- gsub(" *[/] *", "/", equation)
+    equation <- gsub(" *[*] *", "*", equation)
+    equation <- gsub("^\\s+|\\s+$", "", equation)
+    return(equation)
+}
+
+# Function to sanitise group information
+sanitiseGroups <- function(string) {
+    string <- gsub(" *[,] *", ",", string)
+    string <- gsub("^\\s+|\\s+$", "", string)
+    return(string)
+}
+
+# Function to change periods to whitespace in a string
+unmake.names <- function(string) {
+    string <- gsub(".", " ", string, fixed=TRUE)
+    return(string)
+}
+
+# Generate output folder and paths
+makeOut <- function(filename) {
+    return(paste0(opt$outPath, "/", filename))
+}
+
+# Generating design information
+pasteListName <- function(string) {
+    return(paste0("factors$", string))
+}
+
+# Create cata function: default path set, default seperator empty and appending
+# true by default (Ripped straight from the cat function with altered argument
+# defaults)
+cata <- function(..., file=opt$htmlPath, sep="", fill=FALSE, labels=NULL, 
+                                 append=TRUE) {
+    if (is.character(file)) 
+        if (file == "") 
+            file <- stdout()
+    else if (substring(file, 1L, 1L) == "|") {
+        file <- pipe(substring(file, 2L), "w")
+        on.exit(close(file))
+    }
+    else {
+        file <- file(file, ifelse(append, "a", "w"))
+        on.exit(close(file))
+    }
+    .Internal(cat(list(...), file, sep, fill, labels, append))
+}
+
+# Function to write code for html head and title
+HtmlHead <- function(title) {
+    cata("<head>\n")
+    cata("<title>", title, "</title>\n")
+    cata("</head>\n")
+}
+
+# Function to write code for html links
+HtmlLink <- function(address, label=address) {
+    cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n")
+}
+
+# Function to write code for html images
+HtmlImage <- function(source, label=source, height=600, width=600) {
+    cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height)
+    cata("\" width=\"", width, "\"/>\n")
+}
+
+# Function to write code for html list items
+ListItem <- function(...) {
+    cata("<li>", ..., "</li>\n")
+}
+
+TableItem <- function(...) {
+    cata("<td>", ..., "</td>\n")
+}
+
+TableHeadItem <- function(...) {
+    cata("<th>", ..., "</th>\n")
+}
+
+################################################################################
+### Input Processing
+################################################################################
+
+# Collect arguments from command line
+args <- commandArgs(trailingOnly=TRUE)
+
+# Get options, using the spec as defined by the enclosed list.
+# Read the options from the default: commandArgs(TRUE).
+spec <- matrix(c(
+    "htmlPath", "R", 1, "character",
+    "outPath", "o", 1, "character",
+    "filesPath", "j", 2, "character",
+    "matrixPath", "m", 2, "character",
+    "factFile", "f", 2, "character",
+    "factInput", "i", 2, "character",
+    "annoPath", "a", 2, "character",
+    "contrastData", "C", 1, "character",
+    "cpmReq", "c", 1, "double",
+    "totReq", "y", 0, "logical",
+    "cntReq", "z", 1, "integer",
+    "sampleReq", "s", 1, "integer",
+    "normCounts", "x", 0, "logical",
+    "rdaOpt", "r", 0, "logical",
+    "lfcReq", "l", 1, "double",
+    "pValReq", "p", 1, "double",
+    "pAdjOpt", "d", 1, "character",
+    "normOpt", "n", 1, "character",
+    "robOpt", "b", 0, "logical",
+    "lrtOpt", "t", 0, "logical"),
+    byrow=TRUE, ncol=4)
+opt <- getopt(spec)
+
+
+if (is.null(opt$matrixPath) & is.null(opt$filesPath)) {
+    cat("A counts matrix (or a set of counts files) is required.\n")
+    q(status=1)
+}
+
+if (is.null(opt$cpmReq)) {
+    filtCPM <- FALSE
+} else {
+    filtCPM <- TRUE
+}
+
+if (is.null(opt$cntReq) || is.null(opt$sampleReq)) {
+    filtSmpCount <- FALSE
+} else {
+    filtSmpCount <- TRUE
+}
+
+if (is.null(opt$totReq)) {
+    filtTotCount <- FALSE
+} else {
+    filtTotCount <- TRUE
+}
+
+if (is.null(opt$lrtOpt)) {
+    wantLRT <- FALSE
+} else {
+    wantLRT <- TRUE
+}
+
+if (is.null(opt$rdaOpt)) {
+    wantRda <- FALSE
+} else {
+    wantRda <- TRUE   
+}
+
+if (is.null(opt$annoPath)) {
+    haveAnno <- FALSE
+} else {
+    haveAnno <- TRUE
+}
+
+if (is.null(opt$normCounts)) {
+    wantNorm <- FALSE
+} else {   
+    wantNorm <- TRUE
+}
+
+if (is.null(opt$robOpt)) {
+    wantRobust <- FALSE
+} else {
+    wantRobust <- TRUE
+}
+
+
+if (!is.null(opt$filesPath)) {
+    # Process the separate count files (adapted from DESeq2 wrapper)
+    library("rjson")
+    parser <- newJSONParser()
+    parser$addData(opt$filesPath)
+    factorList <- parser$getObject()
+    factors <- sapply(factorList, function(x) x[[1]])
+    filenamesIn <- unname(unlist(factorList[[1]][[2]]))
+    sampleTable <- data.frame(sample=basename(filenamesIn),
+                            filename=filenamesIn,
+                            row.names=filenamesIn,
+                            stringsAsFactors=FALSE)
+    for (factor in factorList) {
+        factorName <- factor[[1]]
+        sampleTable[[factorName]] <- character(nrow(sampleTable))
+        lvls <- sapply(factor[[2]], function(x) names(x))
+        for (i in seq_along(factor[[2]])) {
+            files <- factor[[2]][[i]][[1]]
+            sampleTable[files,factorName] <- lvls[i]
+        }
+        sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls)
+    }
+    rownames(sampleTable) <- sampleTable$sample
+    rem <- c("sample","filename")
+    factors <- sampleTable[, !(names(sampleTable) %in% rem), drop=FALSE]
+    
+    #read in count files and create single table
+    countfiles <- lapply(sampleTable$filename, function(x){read.delim(x, row.names=1)})
+    counts <- do.call("cbind", countfiles)
+    
+} else {
+    # Process the single count matrix
+    counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
+    row.names(counts) <- counts[, 1]
+    counts <- counts[ , -1]
+    countsRows <- nrow(counts)
+
+    # Process factors
+    if (is.null(opt$factInput)) {
+            factorData <- read.table(opt$factFile, header=TRUE, sep="\t")
+            factors <- factorData[, -1, drop=FALSE]
+    }  else {
+            factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE))
+            factorData <- list()
+            for (fact in factors) {
+                newFact <- unlist(strsplit(fact, split="::"))
+                factorData <- rbind(factorData, newFact)
+            } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
+
+            # Set the row names to be the name of the factor and delete first row
+            row.names(factorData) <- factorData[, 1]
+            factorData <- factorData[, -1]
+            factorData <- sapply(factorData, sanitiseGroups)
+            factorData <- sapply(factorData, strsplit, split=",")
+            factorData <- sapply(factorData, make.names)
+            # Transform factor data into data frame of R factor objects
+            factors <- data.frame(factorData)
+    }
+}
+
+ # if annotation file provided
+if (haveAnno) {
+    geneanno <- read.table(opt$annoPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
+}
+
+#Create output directory
+dir.create(opt$outPath, showWarnings=FALSE)
+
+# Split up contrasts separated by comma into a vector then sanitise
+contrastData <- unlist(strsplit(opt$contrastData, split=","))
+contrastData <- sanitiseEquation(contrastData)
+contrastData <- gsub(" ", ".", contrastData, fixed=TRUE)
+
+bcvOutPdf <- makeOut("bcvplot.pdf")
+bcvOutPng <- makeOut("bcvplot.png")
+qlOutPdf <- makeOut("qlplot.pdf")
+qlOutPng <- makeOut("qlplot.png")
+mdsOutPdf <- makeOut("mdsplot.pdf")
+mdsOutPng <- makeOut("mdsplot.png")
+mdOutPdf <- character()   # Initialise character vector
+mdOutPng <- character()
+topOut <- character()
+for (i in 1:length(contrastData)) {
+    mdOutPdf[i] <- makeOut(paste0("mdplot_", contrastData[i], ".pdf"))
+    mdOutPng[i] <- makeOut(paste0("mdplot_", contrastData[i], ".png"))
+    topOut[i] <- makeOut(paste0("edgeR_", contrastData[i], ".tsv"))
+}   # Save output paths for each contrast as vectors
+normOut <- makeOut("edgeR_normcounts.tsv")
+rdaOut <- makeOut("edgeR_analysis.RData")
+sessionOut <- makeOut("session_info.txt")
+
+# Initialise data for html links and images, data frame with columns Label and 
+# Link
+linkData <- data.frame(Label=character(), Link=character(), stringsAsFactors=FALSE)
+imageData <- data.frame(Label=character(), Link=character(), stringsAsFactors=FALSE)
+
+# Initialise vectors for storage of up/down/neutral regulated counts
+upCount <- numeric()
+downCount <- numeric()
+flatCount <- numeric()
+
+################################################################################
+### Data Processing
+################################################################################
+
+# Extract counts and annotation data
+data <- list()
+data$counts <- counts
+if (haveAnno) {
+    data$genes <- geneanno
+} else {
+    data$genes <- data.frame(GeneID=row.names(counts))
+}
+
+# If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of
+# samples. Default is no filtering
+preFilterCount <- nrow(data$counts)
+
+if (filtCPM || filtSmpCount || filtTotCount) {
+
+    if (filtTotCount) {
+        keep <- rowSums(data$counts) >= opt$cntReq
+    } else if (filtSmpCount) {
+        keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
+    } else if (filtCPM) {
+        keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq
+    }
+
+    data$counts <- data$counts[keep, ]
+    data$genes <- data$genes[keep, , drop=FALSE]
+}
+
+postFilterCount <- nrow(data$counts)
+filteredCount <- preFilterCount-postFilterCount
+
+# Creating naming data
+samplenames <- colnames(data$counts)
+sampleanno <- data.frame("sampleID"=samplenames, factors)
+
+
+# Generating the DGEList object "data"
+data$samples <- sampleanno
+data$samples$lib.size <- colSums(data$counts)
+data$samples$norm.factors <- 1
+row.names(data$samples) <- colnames(data$counts)
+data <- new("DGEList", data)
+
+# Name rows of factors according to their sample
+row.names(factors) <- names(data$counts)
+factorList <- sapply(names(factors), pasteListName)
+
+formula <- "~0" 
+for (i in 1:length(factorList)) {
+    formula <- paste(formula, factorList[i], sep="+")
+}
+
+formula <- formula(formula)
+design <- model.matrix(formula)
+
+for (i in 1:length(factorList)) {
+    colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE)
+}
+
+# Calculating normalising factor, estimating dispersion
+data <- calcNormFactors(data, method=opt$normOpt)
+
+if (wantRobust) {
+    data <- estimateDisp(data, design=design, robust=TRUE)
+} else {
+    data <- estimateDisp(data, design=design)
+}
+
+# Generate contrasts information
+contrasts <- makeContrasts(contrasts=contrastData, levels=design)
+
+################################################################################
+### Data Output
+################################################################################
+
+# Plot MDS
+labels <- names(counts)
+png(mdsOutPng, width=600, height=600)
+# Currently only using a single factor
+plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot")
+imageData[1, ] <- c("MDS Plot", "mdsplot.png")
+invisible(dev.off())
+
+pdf(mdsOutPdf)
+plotMDS(data, labels=labels, cex=0.5)
+linkData[1, ] <- c("MDS Plot.pdf", "mdsplot.pdf")
+invisible(dev.off())
+
+# BCV Plot
+png(bcvOutPng, width=600, height=600)
+plotBCV(data, main="BCV Plot")
+imgName <- "BCV Plot"
+imgAddr <- "bcvplot.png"
+imageData <- rbind(imageData, c(imgName, imgAddr))
+invisible(dev.off())
+
+pdf(bcvOutPdf)
+plotBCV(data, main="BCV Plot")
+linkName <- paste0("BCV Plot.pdf")
+linkAddr <- paste0("bcvplot.pdf")
+linkData <- rbind(linkData, c(linkName, linkAddr))
+invisible(dev.off())
+
+# Generate fit
+if (wantLRT) {
+    
+    fit <- glmFit(data, design)
+    
+} else {
+    
+    if (wantRobust) {
+        fit <- glmQLFit(data, design, robust=TRUE)
+    } else {
+        fit <- glmQLFit(data, design)
+    }
+
+    # Plot QL dispersions
+    png(qlOutPng, width=600, height=600)
+    plotQLDisp(fit, main="QL Plot")
+    imgName <- "QL Plot"
+    imgAddr <- "qlplot.png"
+    imageData <- rbind(imageData, c(imgName, imgAddr))
+    invisible(dev.off())
+
+    pdf(qlOutPdf)
+    plotQLDisp(fit, main="QL Plot")
+    linkName <- "QL Plot.pdf"
+    linkAddr <- "qlplot.pdf"
+    linkData <- rbind(linkData, c(linkName, linkAddr))
+    invisible(dev.off())
+}
+
+ # Save normalised counts (log2cpm)
+if (wantNorm) { 
+        normalisedCounts <- cpm(data, normalized.lib.sizes=TRUE, log=TRUE) 
+        normalisedCounts <- data.frame(data$genes, normalisedCounts)
+        write.table (normalisedCounts, file=normOut, row.names=FALSE, sep="\t")
+        linkData <- rbind(linkData, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv"))
+}
+
+
+for (i in 1:length(contrastData)) {
+    if (wantLRT) {
+        res <- glmLRT(fit, contrast=contrasts[, i])
+    } else {
+        res <- glmQLFTest(fit, contrast=contrasts[, i])
+    }
+
+    status = decideTestsDGE(res, adjust.method=opt$pAdjOpt, p.value=opt$pValReq,
+                                             lfc=opt$lfcReq)
+    sumStatus <- summary(status)
+
+    # Collect counts for differential expression
+    upCount[i] <- sumStatus["Up", ]
+    downCount[i] <- sumStatus["Down", ]
+    flatCount[i] <- sumStatus["NotSig", ]
+                                             
+    # Write top expressions table
+    top <- topTags(res, n=Inf, sort.by="PValue")
+    write.table(top, file=topOut[i], row.names=FALSE, sep="\t")
+    
+    linkName <- paste0("edgeR_", contrastData[i], ".tsv")
+    linkAddr <- paste0("edgeR_", contrastData[i], ".tsv")
+    linkData <- rbind(linkData, c(linkName, linkAddr))
+    
+    # Plot MD (log ratios vs mean difference) using limma package
+    pdf(mdOutPdf[i])
+    limma::plotMD(res, status=status,
+                                main=paste("MD Plot:", unmake.names(contrastData[i])), 
+                                col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
+                                xlab="Average Expression", ylab="logFC")
+    
+    abline(h=0, col="grey", lty=2)
+    
+    linkName <- paste0("MD Plot_", contrastData[i], ".pdf")
+    linkAddr <- paste0("mdplot_", contrastData[i], ".pdf")
+    linkData <- rbind(linkData, c(linkName, linkAddr))
+    invisible(dev.off())
+    
+    png(mdOutPng[i], height=600, width=600)
+    limma::plotMD(res, status=status,
+                                main=paste("MD Plot:", unmake.names(contrastData[i])), 
+                                col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
+                                xlab="Average Expression", ylab="logFC")
+    
+    abline(h=0, col="grey", lty=2)
+    
+    imgName <- paste0("MD Plot_", contrastData[i], ".png")
+    imgAddr <- paste0("mdplot_", contrastData[i], ".png")
+    imageData <- rbind(imageData, c(imgName, imgAddr))
+    invisible(dev.off())
+}
+sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount)
+row.names(sigDiff) <- contrastData
+
+# Save relevant items as rda object
+if (wantRda) {
+    if (wantNorm) {
+        save(counts, data, status, normalisedCounts, labels, factors, fit, res, top, contrasts, design,
+                 file=rdaOut, ascii=TRUE)
+    } else {
+        save(counts, data, status, labels, factors, fit, res, top, contrasts, design,
+                 file=rdaOut, ascii=TRUE)
+    }
+    linkData <- rbind(linkData, c("edgeR_analysis.RData", "edgeR_analysis.RData"))
+}
+
+# Record session info
+writeLines(capture.output(sessionInfo()), sessionOut)
+linkData <- rbind(linkData, c("Session Info", "session_info.txt"))
+
+# Record ending time and calculate total run time
+timeEnd <- as.character(Sys.time())
+timeTaken <- capture.output(round(difftime(timeEnd, timeStart), digits=3))
+timeTaken <- gsub("Time difference of ", "", timeTaken, fixed=TRUE)
+
+################################################################################
+### HTML Generation
+################################################################################
+
+# Clear file
+cat("", file=opt$htmlPath)
+
+cata("<html>\n")
+
+cata("<body>\n")
+cata("<h3>edgeR Analysis Output:</h3>\n")
+cata("Links to PDF copies of plots are in 'Plots' section below.<br />\n")
+
+HtmlImage(imageData$Link[1], imageData$Label[1])
+
+for (i in 2:nrow(imageData)) {
+    HtmlImage(imageData$Link[i], imageData$Label[i])
+}
+
+cata("<h4>Differential Expression Counts:</h4>\n")
+
+cata("<table border=\"1\" cellpadding=\"4\">\n")
+cata("<tr>\n")
+TableItem()
+for (i in colnames(sigDiff)) {
+    TableHeadItem(i)
+}
+cata("</tr>\n")
+for (i in 1:nrow(sigDiff)) {
+    cata("<tr>\n")
+    TableHeadItem(unmake.names(row.names(sigDiff)[i]))
+    for (j in 1:ncol(sigDiff)) {
+        TableItem(as.character(sigDiff[i, j]))
+    }
+    cata("</tr>\n")
+}
+cata("</table>")
+
+cata("<h4>Plots:</h4>\n")
+for (i in 1:nrow(linkData)) {
+    if (grepl(".pdf", linkData$Link[i])) {
+        HtmlLink(linkData$Link[i], linkData$Label[i])
+    }
+}
+
+cata("<h4>Tables:</h4>\n")
+for (i in 1:nrow(linkData)) {
+    if (grepl(".tsv", linkData$Link[i])) {
+        HtmlLink(linkData$Link[i], linkData$Label[i])
+    }
+}
+
+if (wantRda) {
+    cata("<h4>R Data Objects:</h4>\n")
+    for (i in 1:nrow(linkData)) {
+        if (grepl(".RData", linkData$Link[i])) {
+            HtmlLink(linkData$Link[i], linkData$Label[i])
+        }
+    }
+}
+
+cata("<p>Alt-click links to download file.</p>\n")
+cata("<p>Click floppy disc icon associated history item to download ")
+cata("all files.</p>\n")
+cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n")
+
+cata("<h4>Additional Information</h4>\n")
+cata("<ul>\n")
+
+if (filtCPM || filtSmpCount || filtTotCount) {
+    if (filtCPM) {
+    tempStr <- paste("Genes without more than", opt$cmpReq,
+                                     "CPM in at least", opt$sampleReq, "samples are insignificant",
+                                     "and filtered out.")
+    } else if (filtSmpCount) {
+        tempStr <- paste("Genes without more than", opt$cntReq,
+                                     "counts in at least", opt$sampleReq, "samples are insignificant",
+                                     "and filtered out.")
+    } else if (filtTotCount) {
+            tempStr <- paste("Genes without more than", opt$cntReq,
+                                     "counts, after summing counts for all samples, are insignificant",
+                                     "and filtered out.")
+    }
+
+    ListItem(tempStr)
+    filterProp <- round(filteredCount/preFilterCount*100, digits=2)
+    tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp,
+                                     "%) genes were filtered out for low expression.")
+    ListItem(tempStr)
+}
+ListItem(opt$normOpt, " was the method used to normalise library sizes.")
+if (wantLRT) {
+    ListItem("The edgeR likelihood ratio test was used.")
+} else {
+    if (wantRobust) {
+        ListItem("The edgeR quasi-likelihood test was used with robust settings (robust=TRUE with estimateDisp and glmQLFit).")
+    } else {
+            ListItem("The edgeR quasi-likelihood test was used.")
+    }
+}
+if (opt$pAdjOpt!="none") {
+    if (opt$pAdjOpt=="BH" || opt$pAdjOpt=="BY") {
+        tempStr <- paste0("MD-Plot highlighted genes are significant at FDR ",
+                                            "of ", opt$pValReq," and exhibit log2-fold-change of at ", 
+                                            "least ", opt$lfcReq, ".")
+        ListItem(tempStr)
+    } else if (opt$pAdjOpt=="holm") {
+        tempStr <- paste0("MD-Plot highlighted genes are significant at adjusted ",
+                                            "p-value of ", opt$pValReq,"  by the Holm(1979) ",
+                                            "method, and exhibit log2-fold-change of at least ", 
+                                            opt$lfcReq, ".")
+        ListItem(tempStr)
+    }
+} else {
+    tempStr <- paste0("MD-Plot highlighted genes are significant at p-value ",
+                                        "of ", opt$pValReq," and exhibit log2-fold-change of at ", 
+                                        "least ", opt$lfcReq, ".")
+    ListItem(tempStr)
+}
+cata("</ul>\n")
+
+cata("<h4>Summary of experimental data:</h4>\n")
+
+cata("<p>*CHECK THAT SAMPLES ARE ASSOCIATED WITH CORRECT GROUP(S)*</p>\n")
+
+cata("<table border=\"1\" cellpadding=\"3\">\n")
+cata("<tr>\n")
+TableHeadItem("SampleID")
+TableHeadItem(names(factors)[1], " (Primary Factor)")
+
+    if (ncol(factors) > 1) {
+        for (i in names(factors)[2:length(names(factors))]) {
+            TableHeadItem(i)
+        }
+        cata("</tr>\n")
+    }
+
+for (i in 1:nrow(factors)) {
+    cata("<tr>\n")
+    TableHeadItem(row.names(factors)[i])
+    for (j in 1:ncol(factors)) {
+        TableItem(as.character(unmake.names(factors[i, j])))
+    }
+    cata("</tr>\n")
+}
+cata("</table>")
+
+for (i in 1:nrow(linkData)) {
+    if (grepl("session_info", linkData$Link[i])) {
+        HtmlLink(linkData$Link[i], linkData$Label[i])
+    }
+}
+
+cata("<table border=\"0\">\n")
+cata("<tr>\n")
+TableItem("Task started at:"); TableItem(timeStart)
+cata("</tr>\n")
+cata("<tr>\n")
+TableItem("Task ended at:"); TableItem(timeEnd)
+cata("</tr>\n")
+cata("<tr>\n")
+TableItem("Task run time:"); TableItem(timeTaken)
+cata("<tr>\n")
+cata("</table>\n")
+
+cata("</body>\n")
+cata("</html>")