changeset 6:2f8ddef8d545 draft

update deseq2
author mingchen0919
date Tue, 07 Nov 2017 13:50:32 -0500
parents fd3514267506
children 466053167103
files DESeq.Rmd DESeq.xml DESeq_render.R DESeq_results.Rmd DESeq_results.xml DESeq_results_render.R DESeq_visualization.Rmd DESeq_visualization.xml DESeq_visualization_render.R
diffstat 9 files changed, 318 insertions(+), 718 deletions(-) [+]
line wrap: on
line diff
--- a/DESeq.Rmd	Tue Aug 08 15:06:40 2017 -0400
+++ b/DESeq.Rmd	Tue Nov 07 13:50:32 2017 -0500
@@ -10,24 +10,22 @@
 
 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
 knitr::opts_chunk$set(
-  echo = ECHO
+  echo = ECHO,
+  error = TRUE
 )
-
-library(stringi)
-library(DESeq2)
-library(pheatmap)
-# library(PoiClaClu)
-library(RColorBrewer)
 ```
 
 # `DESeqDataSet` object
 
-```{r}
-count_files = strsplit(opt$count_files, ',')[[1]]
+```{r 'DESeqDataSet object'}
+count_file_paths = strsplit(opt$count_file_paths, ',')[[1]]
+count_file_names = strsplit(opt$count_file_names, ',')[[1]]
 sample_table = read.table(opt$sample_table, header = TRUE)
+row.names(sample_table) = sample_table[,2]
+sample_table = sample_table[count_file_names, ]
 
 ## copy count files into working directory
-file_copy = file.copy(count_files, sample_table$fileName, overwrite = TRUE)
+file_copy = file.copy(count_file_paths, count_file_names, overwrite = TRUE)
 
 ## DESeqDataSet object
 dds = DESeqDataSetFromHTSeqCount(sampleTable = sample_table,
@@ -55,14 +53,14 @@
 
 ## Count Data
 
-```{r}
+```{r 'count data'}
 datatable(head(counts(dds), 100), style="bootstrap", 
           class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
 ```
 
 ## Sample Table 
 
-```{r}
+```{r 'sample table'}
 datatable(sample_table, style="bootstrap",
           class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
 ```
--- a/DESeq.xml	Tue Aug 08 15:06:40 2017 -0400
+++ b/DESeq.xml	Tue Nov 07 13:50:32 2017 -0500
@@ -1,10 +1,10 @@
 <tool id="DESeq" name="DESeq2: DESeq" version="1.0.0">
     <requirements>
         <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
-        <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement>
         <requirement type="package" version="1.20.0">r-getopt</requirement>
         <requirement type="package" version="1.2">r-rmarkdown</requirement>
         <requirement type="package" version="1.8.4">r-plyr</requirement>
+        <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement>
         <requirement type="package" version="1.1.0">r-stringr</requirement>
         <requirement type="package" version="0.4.0">r-highcharter</requirement>
         <requirement type="package" version="0.2">r-dt</requirement>
@@ -18,46 +18,52 @@
         An R Markdown tool to perform DESeq analysis.
     </description>
     <stdio>
-        <regex match="Execution halted"
-               source="both"
-               level="fatal"
-               description="Execution halted." />
-        <regex match="Error in"
-               source="both"
-               level="fatal"
-               description="An undefined error occured, please check your intput carefully and contact your administrator." />
-        <regex match="Fatal error"
-               source="both"
-               level="fatal"
-               description="An undefined error occured, please check your intput carefully and contact your administrator." />
+        <!--redirecting stderr to a file. "XXX" is used to match with nothing so that tool running won't be interrupted during testing-->
+        <regex match="XXX"
+               source="stderr"
+               level="warning"
+               description="Check the warnings_and_errors.txt file for more details."/>
     </stdio>
     <command>
         <![CDATA[
 
         Rscript '${__tool_directory__}/DESeq_render.R'
 
+
             ## 1. input data
             -e $echo
-            -c $count_files
-            -s $sample_table
-            -p "$design_formula"
+            ##----- code chunk to get file paths and raw file names for a multiple inputs data field ----
+            #set $sep = ''
+            #set $count_file_paths = ''
+            #set $count_file_names = ''
+            #for $count_file in $count_files:
+                #set $count_file_paths += $sep + str($count_file)
+                #set $count_file_names += $sep + str($count_file.name)
+                #set $sep = ','
+            #end for
+            ##----------------- end for getting file names and file paths ------------------------------
+            -P '$count_file_paths'
+            -N '$count_file_names'
+            -S $sample_table
+            -p '$design_formula'
 
             ## 2. output report and report site directory
-		    -o $DESeq
-		    -d $DESeq.files_path
+		    -r $report
+		    -d $report.files_path
+		    -s $sink_message
 		    -w $deseq_workspace
 
-		    ## 3. Rmd templates sitting in the tool directory
-
-		        ## _site.yml and index.Rmd template files
-                -D '${__tool_directory__}/DESeq.Rmd'
+		    ## 3. Rmd templates from the tool directory
+            -t '${__tool_directory__}/DESeq.Rmd'
 
 
 
         ]]>
     </command>
     <inputs>
-        <param type="data" name="count_files" format="tabular" multiple="true" label="Count files from htseq-count" />
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"
+               label="Display analysis code in report?"/>
+        <param type="data" name="count_files" format="tabular" multiple="true" label="Count files from htseq-count"/>
         <param type="data" name="sample_table" format="tabular" multiple="false" label="sample table file"
                help="The sample table file contains a table. The first column is the sample name, the second column is
                     the count file name and the rest of columns are treatment columns. The file names in this table have
@@ -72,11 +78,11 @@
                 </valid>
             </sanitizer>
         </param>
-        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" />
     </inputs>
     <outputs>
-        <data name="DESeq" format="html" label="DESeq Analysis" />
-        <data name="deseq_workspace" format="rdata" label="R workspace: DESeq analysis" />
+        <data name="report" format="html" label="DESeq Analysis on ${on_string}"/>
+        <data format="txt" name="sink_message" label="Warnings and Errors on ${on_string}" from_work_dir="warnings_and_errors.txt"/>
+        <data name="deseq_workspace" format="rdata" label="R workspace: DESeq analysis on ${on_string}"/>
     </outputs>
     <citations>
         <citation>
@@ -94,7 +100,8 @@
         <citation type="bibtex">
             @article{allaire2016rmarkdown,
             title={rmarkdown: Dynamic Documents for R, 2016},
-            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
+            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
             journal={R package version 0.9},
             volume={6},
             year={2016}
@@ -110,21 +117,15 @@
             }
         </citation>
         <citation>
-            @misc{pheatmap2015,
-            title = {pheatmap: Pretty Heatmaps},
-            author = {Raivo Kolde},
-            year = {2015},
-            note = {R package version 1.0.8},
-            url = {https://CRAN.R-project.org/package=pheatmap},
-            }
-        </citation>
-        <citation>
-            @misc{dt2016,
-            title = {DT: A Wrapper of the JavaScript Library 'DataTables'},
-            author = {Yihui Xie},
-            year = {2016},
-            note = {R package version 0.2},
-            url = {https://CRAN.R-project.org/package=DT},
+            @article{love2014moderated,
+            title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2},
+            author={Love, Michael I and Huber, Wolfgang and Anders, Simon},
+            journal={Genome biology},
+            volume={15},
+            number={12},
+            pages={550},
+            year={2014},
+            publisher={BioMed Central}
             }
         </citation>
     </citations>
--- a/DESeq_render.R	Tue Aug 08 15:06:40 2017 -0400
+++ b/DESeq_render.R	Tue Nov 07 13:50:32 2017 -0500
@@ -1,90 +1,70 @@
-##======= Handle arguments from command line ========
-# setup R error handline to go to stderr
-options(show.error.messages=FALSE,
-error=function(){
-    cat(geterrmessage(), file=stderr())
-    quit("no", 1, F)
-})
-
-# we need that to not crash galaxy with an UTF8 error on German LC settings.
-loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
-
-# suppress warning
-options(warn = -1)
-
-options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)
-args = commandArgs(trailingOnly=TRUE)
-
-suppressPackageStartupMessages({
-    library(getopt)
-    library(tools)
-})
+library(getopt)
+library(rmarkdown)
+library(htmltools)
+library(dplyr)
+library(stringi)
+library(DESeq2)
+library(pheatmap)
+library(RColorBrewer)
+library(DT)
 
-# column 1: the long flag name
-# column 2: the short flag alias. A SINGLE character string
-# column 3: argument mask
-#           0: no argument
-#           1: argument required
-#           2: argument is optional
-# column 4: date type to which the flag's argument shall be cast.
-#           possible values: logical, integer, double, complex, character.
-spec_list=list()
-
-##------- 1. input data ---------------------
-spec_list$COUNT_FILES = c('count_files', 'c', '1', 'character')
-spec_list$ECHO = c('echo', 'e', '1', 'character')
-spec_list$SAMPLE_TABLE = c('sample_table', 's', '1', 'character')
-spec_list$DESIGN_FORMULA = c('design_formula', 'p', '1', 'character')
-
-##--------2. output report and report site directory --------------
-spec_list$OUTPUT_HTML = c('deseq_html', 'o', '1', 'character')
-spec_list$OUTPUT_DIR = c('deseq_dir', 'd', '1', 'character')
-spec_list$WORKSPACE = c('deseq_workspace', 'w', '1', 'character')
-
-##--------3. Rmd templates sitting in the tool directory ----------
-
-spec_list$DESEQ_RMD = c('deseq_rmd', 'D', '1', 'character')
+##============ Sink warnings and errors to a file ==============
+## use the sink() function to wrap all code within it.
+##==============================================================
+zz = file('warnings_and_errors.txt')
+sink(zz)
+sink(zz, type = 'message')
+  ##---------below is the code for rendering .Rmd templates-----
+  
+  ##=============STEP 1: handle command line arguments==========
+  ##
+  ##============================================================
+  # column 1: the long flag name
+  # column 2: the short flag alias. A SINGLE character string
+  # column 3: argument mask
+  #           0: no argument
+  #           1: argument required
+  #           2: argument is optional
+  # column 4: date type to which the flag's argument shall be cast.
+  #           possible values: logical, integer, double, complex, character.
+  #-------------------------------------------------------------
+  #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++
+  # 1. short flag alias should match the flag in the command section in the XML file.
+  # 2. long flag name can be any legal R variable names
+  # 3. two names in args_list can have common string but one name should not be a part of another name.
+  #    for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems.
+  #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  args_list=list()
+  ##------- 1. input data ---------------------
+  args_list$ECHO = c('echo', 'e', '1', 'character')
+  args_list$COUNT_FILE_PATHS = c('count_file_paths', 'P', '1', 'character')
+  args_list$COUNT_FILE_NAMES = c('count_file_names', 'N', '1', 'character')
+  args_list$SAMPLE_TABLE = c('sample_table', 'S', '1', 'character')
+  args_list$DESIGN_FORMULA = c('design_formula', 'p', '1', 'character')
+  ##--------2. output report and outputs --------------
+  args_list$REPORT_HTML = c('report_html', 'r', '1', 'character')
+  args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character')
+  args_list$SINK_MESSAGE = c('sink_message', 's', '1', 'character')
+  args_list$WORKSPACE = c('deseq_workspace', 'w', '1', 'character')
+  ##--------3. .Rmd templates in the tool directory ----------
+  args_list$deseq_RMD = c('deseq_rmd', 't', '1', 'character')
+  ##-----------------------------------------------------------
+  opt = getopt(t(as.data.frame(args_list)))
 
 
-
-##------------------------------------------------------------------
-
-spec = t(as.data.frame(spec_list))
-opt = getopt(spec)
-# arguments are accessed by long flag name (the first column in the spec matrix)
-#                        NOT by element name in the spec_list
-# example: opt$help, opt$expression_file
-##====== End of arguments handling ==========
-
-#------ Load libraries ---------
-library(rmarkdown)
-library(plyr)
-library(stringr)
-library(dplyr)
-library(highcharter)
-library(DT)
-library(reshape2)
-# library(Kmisc)
-library(plotly)
-library(formattable)
-library(htmltools)
-
-
-#----- 1. create the report directory ------------------------
-system(paste0('mkdir -p ', opt$deseq_dir))
-
-
-#----- 2. generate Rmd files with Rmd templates --------------
-#   a. templates without placeholder variables:
-#         copy templates from tool directory to the working directory.
-#   b. templates with placeholder variables:
-#         substitute variables with user input values and place them in the working directory.
-
-
-#----- 01 DESeq.Rmd -----------------------
-readLines(opt$deseq_rmd) %>%
+  
+  ##=======STEP 2: create report directory (optional)==========
+  ##
+  ##===========================================================
+  dir.create(opt$report_dir)
+  
+  ##=STEP 3: replace placeholders in .Rmd with argument values=
+  ##
+  ##===========================================================
+  #++ need to replace placeholders with args values one by one+
+  readLines(opt$deseq_rmd) %>%
     (function(x) {
-        gsub('ECHO', opt$echo, x)
+      gsub('ECHO', opt$echo, x)
     }) %>%
     (function(x) {
       gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x)
@@ -93,21 +73,21 @@
       gsub('DESIGN_FORMULA', opt$design_formula, x)
     }) %>%
     (function(x) {
-        gsub('OUTPUT_DIR', opt$deseq_dir, x)
+      gsub('REPORT_DIR', opt$report_dir, x)
     }) %>%
     (function(x) {
-        fileConn = file('DESeq.Rmd')
-        writeLines(x, con=fileConn)
-        close(fileConn)
+      fileConn = file('deseq.Rmd')
+      writeLines(x, con=fileConn)
+      close(fileConn)
     })
+  
+
+  ##=============STEP 4: render .Rmd templates=================
+  ##
+  ##===========================================================
+  render('deseq.Rmd', output_file = opt$report_html)
 
 
-#------ 3. render all Rmd files --------
-render('DESeq.Rmd', output_file = opt$deseq_html)
-
-
-#-------4. manipulate outputs -----------------------------
-# document file
-# file.copy('DESeq.html', opt$deseq_html, recursive = TRUE)
-
-
+  ##--------end of code rendering .Rmd templates----------------
+sink()
+##=========== End of sinking output=============================
\ No newline at end of file
--- a/DESeq_results.Rmd	Tue Aug 08 15:06:40 2017 -0400
+++ b/DESeq_results.Rmd	Tue Nov 07 13:50:32 2017 -0500
@@ -10,17 +10,14 @@
 
 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
 knitr::opts_chunk$set(
-  echo = ECHO
+  echo = ECHO,
+  error = TRUE
 )
-
-library(DESeq2)
-library(pheatmap)
-library(genefilter)
 ```
 
-# Import workspace
 
 ```{r eval=TRUE}
+# Import workspace
 fcp = file.copy("DESEQ_WORKSPACE", "deseq.RData")
 load("deseq.RData")
 ```
@@ -30,9 +27,11 @@
 ## Result table
 
 ```{r}
-group = colnames(sample_table)[CONTRAST_GROUP]
-res <- results(dds, contrast = c(group, 'TREATMENT_LEVEL', 'CONDITION_LEVEL'))
-datatable(as.data.frame(res), style="bootstrap", filter = 'top',
+cat('--- View the top 100 rows of the result table ---')
+res <- results(dds, contrast = c('CONTRAST_FACTOR', 'TREATMENT_LEVEL', 'CONDITION_LEVEL'))
+write.csv(as.data.frame(res), file = 'deseq_results.csv')
+res_df = as.data.frame(res)[1:100, ]
+datatable(res_df, style="bootstrap", filter = 'top',
           class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
 ```
 
@@ -45,16 +44,10 @@
 
 # MA-plot {.tabset}
 
-## Shrinked with `lfcShrink()` function
 
-```{r eval=FALSE}
-shrink_res = DESeq2::lfcShrink(dds, contrast = c(group, 'TREATMENT_LEVEL', 'CONDITION_LEVEL'), res=res)
-plotMA(shrink_res)
-```
-
-## Shrinked with Bayesian procedure
 
 ```{r}
+cat('--- Shrinked with Bayesian procedure ---')
 plotMA(res)
 ```
 
@@ -68,11 +61,11 @@
 ```
 
 
-# Gene clustering
+# Visualization {.tabset}
+## Gene clustering
 
 ```{r}
-group_index = as.numeric(strsplit("CLUSTERING_GROUPS", ',')[[1]])
-clustering_groups = colnames(sample_table)[group_index]
+clustering_groups = strsplit("CLUSTERING_FACTORS", ',')[[1]]
 
 topVarGenes <- head(order(rowVars(assay(rld)), decreasing = TRUE), 20)
 mat  <- assay(rld)[ topVarGenes, ]
@@ -83,3 +76,34 @@
 pheatmap(mat, annotation_col = annotation_col)
 ```
 
+## Sample-to-sample distance
+
+```{r}
+sampleDistMatrix <- as.matrix( sampleDists )
+colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
+pheatmap(sampleDistMatrix,
+         clustering_distance_cols = sampleDists,
+         col = colors)
+```
+
+## PCA plot 
+
+```{r}
+plotPCA(rld, intgroup = clustering_groups)
+```
+
+## MDS plot {.tabset}
+
+### Data table
+```{r}
+mds <- as.data.frame(colData(rld))  %>%
+         cbind(cmdscale(sampleDistMatrix))
+knitr::kable(mds)
+```
+
+### Plot
+```{r}
+ggplot(mds, aes(x = `1`, y = `2`, col = time)) +
+  geom_point(size = 3) + coord_fixed()
+```
+
--- a/DESeq_results.xml	Tue Aug 08 15:06:40 2017 -0400
+++ b/DESeq_results.xml	Tue Nov 07 13:50:32 2017 -0500
@@ -1,10 +1,10 @@
-<tool id="DESeq_results" name="DESeq2: Results" version="1.0.0">
+<tool id="DESeq_results" name="DESeq2: Results" version="2.0.0">
     <requirements>
         <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
-        <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement>
         <requirement type="package" version="1.20.0">r-getopt</requirement>
         <requirement type="package" version="1.2">r-rmarkdown</requirement>
         <requirement type="package" version="1.8.4">r-plyr</requirement>
+        <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement>
         <requirement type="package" version="1.1.0">r-stringr</requirement>
         <requirement type="package" version="0.4.0">r-highcharter</requirement>
         <requirement type="package" version="0.2">r-dt</requirement>
@@ -18,18 +18,11 @@
         An R Markdown tool to display DESeq analysis.
     </description>
     <stdio>
-        <regex match="Execution halted"
-               source="both"
-               level="fatal"
-               description="Execution halted." />
-        <regex match="Error in"
-               source="both"
-               level="fatal"
-               description="An undefined error occured, please check your intput carefully and contact your administrator." />
-        <regex match="Fatal error"
-               source="both"
-               level="fatal"
-               description="An undefined error occured, please check your intput carefully and contact your administrator." />
+        <!--redirecting stderr to a file. "XXX" is used to match with nothing so that tool running won't be interrupted during testing-->
+        <regex match="XXX"
+               source="stderr"
+               level="warning"
+               description="Check the warnings_and_errors.txt file for more details."/>
     </stdio>
     <command>
         <![CDATA[
@@ -38,53 +31,45 @@
 
             ## 1. input data
             -e $echo
-            -w $deseq_workspace
-            -c "$contrast_group"
-            -t $treatment
-            -k $condition
+            -W $deseq_workspace
+            -C '$contrast_factor'
+            -T '$treatment'
+            -K '$condition'
 
-            #set $groups = []
-            #for $c_group in $clustering_groups
-                #if str($c_group.group)
-                    #set $groups = $groups + [str($c_group.group)]
-                #end if
-            #end for
-            #set $groups = ','.join($groups)
-            -m "$groups"
+            -M '$clustering_factors'
 
             ## 2. output report and report site directory
-		    -o $deseq_results
-		    -d $deseq_results.files_path
+		    -r $report
+            -d $report.files_path
+            -s $sink_message
+            -R $deseq_results
 
 		    ## 3. Rmd templates sitting in the tool directory
-
-		        ## _site.yml and index.Rmd template files
-                -D '${__tool_directory__}/DESeq_results.Rmd'
+            -t '${__tool_directory__}/DESeq_results.Rmd'
 
 
 
         ]]>
     </command>
     <inputs>
-        <param type="data" name="deseq_workspace" format="rdata" multiple="false" label="Workspace from tool DESeq2: DESeq" />
-        <param type="data" name="sample_table" format="tabular" multiple="false" label="Sample table file" />
-        <param type="data_column" name="contrast_group" data_ref="sample_table" use_header_names="true"
-               optional="false"
-               label="Group for result contrast"
-               help=""/>
-        <param type="text" name="treatment" label="Treatment level" />
-        <param type="text" name="condition" label="Condition level" />
-
-        <repeat name="clustering_groups" title="Gene clustering groups" min="1">
-            <param type="data_column" name="group" data_ref="sample_table" use_header_names="true"
-                   optional="false"
-                   label="A phenotype column from the sample table" />
-        </repeat>
-
-        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" />
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"
+               label="Display analysis code in report?"/>
+        <param type="data" name="deseq_workspace" format="rdata" multiple="false" optional="false"
+               label="Workspace from tool DESeq2: DESeq"/>
+        <param type="text" name="contrast_factor" label="Factor" optional="false"
+               help="the name of a factor in the design formula"/>
+        <param type="text" name="treatment" label="Treatment level" optional="false"
+               help=" the name of the numerator level for the fold change"/>
+        <param type="text" name="condition" label="Condition level" optional="false"
+               help=" the name of the denominator level for the fold change"/>
+        <param type="text" name="clustering_factors" title="Gene clustering factors" optional="false"
+               label="factors of interest for clustering samples and PCA plot"
+               help="A single factor or multiple factors from the design formula. Multiple factors are separated by comma (,)."/>
     </inputs>
     <outputs>
-        <data name="deseq_results" format="html" label="DESeq Results" />
+        <data format="html" name="report" label="DESeq results report on ${on_string}" />
+        <data format="txt" name="sink_message" label="Warnings and Errors on ${on_string}" from_work_dir="warnings_and_errors.txt"/>
+        <data format="csv" name="deseq_results" label="DESeq results on ${on_string}" from_work_dir="deseq_results.csv" />
     </outputs>
     <citations>
         <citation>
@@ -102,7 +87,8 @@
         <citation type="bibtex">
             @article{allaire2016rmarkdown,
             title={rmarkdown: Dynamic Documents for R, 2016},
-            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
+            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
             journal={R package version 0.9},
             volume={6},
             year={2016}
@@ -118,21 +104,15 @@
             }
         </citation>
         <citation>
-            @misc{pheatmap2015,
-            title = {pheatmap: Pretty Heatmaps},
-            author = {Raivo Kolde},
-            year = {2015},
-            note = {R package version 1.0.8},
-            url = {https://CRAN.R-project.org/package=pheatmap},
-            }
-        </citation>
-        <citation>
-            @misc{dt2016,
-            title = {DT: A Wrapper of the JavaScript Library 'DataTables'},
-            author = {Yihui Xie},
-            year = {2016},
-            note = {R package version 0.2},
-            url = {https://CRAN.R-project.org/package=DT},
+            @article{love2014moderated,
+            title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2},
+            author={Love, Michael I and Huber, Wolfgang and Anders, Simon},
+            journal={Genome biology},
+            volume={15},
+            number={12},
+            pages={550},
+            year={2014},
+            publisher={BioMed Central}
             }
         </citation>
     </citations>
--- a/DESeq_results_render.R	Tue Aug 08 15:06:40 2017 -0400
+++ b/DESeq_results_render.R	Tue Nov 07 13:50:32 2017 -0500
@@ -1,120 +1,108 @@
-##======= Handle arguments from command line ========
-# setup R error handline to go to stderr
-options(show.error.messages=FALSE,
-        error=function(){
-          cat(geterrmessage(), file=stderr())
-          quit("no", 1, F)
-        })
-
-# we need that to not crash galaxy with an UTF8 error on German LC settings.
-loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
-
-# suppress warning
-options(warn = -1)
-
-options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)
-args = commandArgs(trailingOnly=TRUE)
-
-suppressPackageStartupMessages({
-  library(getopt)
-  library(tools)
-})
+library(getopt)
+library(rmarkdown)
+library(htmltools)
+library(dplyr)
+library(DESeq2)
+library(pheatmap)
+library(genefilter)
+library(DT)
+library(stringi)
+library(RColorBrewer)
+library(ggplot2)
 
-# column 1: the long flag name
-# column 2: the short flag alias. A SINGLE character string
-# column 3: argument mask
-#           0: no argument
-#           1: argument required
-#           2: argument is optional
-# column 4: date type to which the flag's argument shall be cast.
-#           possible values: logical, integer, double, complex, character.
-spec_list=list()
-
-##------- 1. input data ---------------------
-spec_list$ECHO = c('echo', 'e', '1', 'character')
-spec_list$DESEQ_WORKSPACE = c('deseq_workspace', 'w', '1', 'character')
-spec_list$SAMPLE_TABLE = c('sample_table', 's', '1', 'character')
-spec_list$CONTRAST_GROUP = c('contrast_group', 'c', '1', 'character')
-spec_list$TREATMENT_LEVEL = c('treatment_level', 't', '1', 'character')
-spec_list$CONDITION_LEVEL = c('condition_level', 'k', '1', 'character')
-spec_list$CLUSTERING_GROUPS = c('clustering_groups', 'm', '1', 'character')
-
-##--------2. output report and report site directory --------------
-spec_list$OUTPUT_HTML = c('deseq_results_html', 'o', '1', 'character')
-spec_list$OUTPUT_DIR = c('deseq_results_dir', 'd', '1', 'character')
-
-##--------3. Rmd templates sitting in the tool directory ----------
-
-spec_list$DESEQ_VISUALIZATION_RMD = c('deseq_results_rmd', 'D', '1', 'character')
-
+##============ Sink warnings and errors to a file ==============
+## use the sink() function to wrap all code within it.
+##==============================================================
+zz = file('warnings_and_errors.txt')
+sink(zz)
+sink(zz, type = 'message')
+  ##---------below is the code for rendering .Rmd templates-----
+  
+  ##=============STEP 1: handle command line arguments==========
+  ##
+  ##============================================================
+  # column 1: the long flag name
+  # column 2: the short flag alias. A SINGLE character string
+  # column 3: argument mask
+  #           0: no argument
+  #           1: argument required
+  #           2: argument is optional
+  # column 4: date type to which the flag's argument shall be cast.
+  #           possible values: logical, integer, double, complex, character.
+  #-------------------------------------------------------------
+  #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++
+  # 1. short flag alias should match the flag in the command section in the XML file.
+  # 2. long flag name can be any legal R variable names
+  # 3. two names in args_list can have common string but one name should not be a part of another name.
+  #    for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems.
+  #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  args_list=list()
+  ##------- 1. input data ---------------------
+  args_list$ECHO = c('echo', 'e', '1', 'character')
+  args_list$DESEQ_WORKSPACE = c('deseq_workspace', 'W', '1', 'character')
+  args_list$CONTRAST_FACTOR = c('contrast_factor', 'C', '1', 'character')
+  args_list$TREATMENT_LEVEL = c('treatment_level', 'T', '1', 'character')
+  args_list$CONDITION_LEVEL = c('condition_level', 'K', '1', 'character')
+  args_list$CLUSTERING_FACTORS = c('clustering_factors', 'M', '1', 'character')
+  ##--------2. output report and outputs --------------
+  args_list$REPORT_HTML = c('report_html', 'r', '1', 'character')
+  args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character')
+  args_list$SINK_MESSAGE = c('sink_message', 's', '1', 'character')
+  args_list$DESEQ_RESULTS = c('deseq_results', 'R', '1', 'character')
+  ##--------3. .Rmd templates in the tool directory ----------
+  args_list$deseq_results_RMD = c('deseq_results_rmd', 't', '1', 'character')
+  ##-----------------------------------------------------------
+  opt = getopt(t(as.data.frame(args_list)))
 
 
-##------------------------------------------------------------------
-
-spec = t(as.data.frame(spec_list))
-opt = getopt(spec)
-# arguments are accessed by long flag name (the first column in the spec matrix)
-#                        NOT by element name in the spec_list
-# example: opt$help, opt$expression_file
-##====== End of arguments handling ==========
+  
+  ##=======STEP 2: create report directory (optional)==========
+  ##
+  ##===========================================================
+  dir.create(opt$report_dir)
+  
+  ##=STEP 3: replace placeholders in .Rmd with argument values=
+  ##
+  ##===========================================================
+  #++ need to replace placeholders with args values one by one+
+  readLines(opt$deseq_results_rmd) %>%
+    (function(x) {
+      gsub('ECHO', opt$echo, x)
+    }) %>%
+    (function(x) {
+      gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x)
+    }) %>%
+    (function(x) {
+      gsub('CONTRAST_FACTOR', opt$contrast_factor, x)
+    }) %>%
+    (function(x) {
+      gsub('TREATMENT_LEVEL', opt$treatment_level, x)
+    }) %>%
+    (function(x) {
+      gsub('CONDITION_LEVEL', opt$condition_level, x)
+    }) %>%
+    (function(x) {
+      gsub('CLUSTERING_FACTORS', opt$clustering_factors, x)
+    }) %>%
+    (function(x) {
+      gsub('REPORT_DIR', opt$report_dir, x)
+    }) %>%
+    (function(x) {
+      gsub('DESEQ_RESULTS', opt$deseq_results, x)
+    }) %>%
+    (function(x) {
+      fileConn = file('deseq_results.Rmd')
+      writeLines(x, con=fileConn)
+      close(fileConn)
+    })
+  
 
-#------ Load libraries ---------
-library(rmarkdown)
-library(plyr)
-library(stringr)
-library(dplyr)
-library(highcharter)
-library(DT)
-library(reshape2)
-# library(Kmisc)
-library(plotly)
-library(formattable)
-library(htmltools)
-
-
-#----- 1. create the report directory ------------------------
-system(paste0('mkdir -p ', opt$deseq_results_dir))
-
-
-#----- 2. generate Rmd files with Rmd templates --------------
-#   a. templates without placeholder variables:
-#         copy templates from tool directory to the working directory.
-#   b. templates with placeholder variables:
-#         substitute variables with user input values and place them in the working directory.
+  ##=============STEP 4: render .Rmd templates=================
+  ##
+  ##===========================================================
+  render('deseq_results.Rmd', output_file = opt$report_html)
 
 
-#----- 01 DESeq_results.Rmd -----------------------
-readLines(opt$deseq_results_rmd) %>%
-  (function(x) {
-    gsub('ECHO', opt$echo, x)
-  }) %>%
-  (function(x) {
-    gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x)
-  }) %>%
-  (function(x) {
-    gsub('CONTRAST_GROUP', opt$contrast_group, x)
-  }) %>%
-  (function(x) {
-    gsub('TREATMENT_LEVEL', opt$treatment_level, x)
-  }) %>%
-  (function(x) {
-    gsub('CONDITION_LEVEL', opt$condition_level, x)
-  }) %>%
-  (function(x) {
-    gsub('CLUSTERING_GROUPS', opt$clustering_groups, x)
-  }) %>%
-  (function(x) {
-    gsub('OUTPUT_DIR', opt$deseq_results_dir, x)
-  }) %>%
-  (function(x) {
-    fileConn = file('DESeq_results.Rmd')
-    writeLines(x, con=fileConn)
-    close(fileConn)
-  })
-
-
-#------ 3. render all Rmd files --------
-render('DESeq_results.Rmd', output_file = opt$deseq_results_html)
-
-
-#-------4. manipulate outputs -----------------------------
+  ##--------end of code rendering .Rmd templates----------------
+sink()
+##=========== End of sinking output=============================
\ No newline at end of file
--- a/DESeq_visualization.Rmd	Tue Aug 08 15:06:40 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,114 +0,0 @@
----
-title: 'DESeq2: Visualization'
-output:
-    html_document:
-      number_sections: true
-      toc: true
-      theme: cosmo
-      highlight: tango
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(
-  echo = ECHO
-)
-
-library(stringi)
-library(DESeq2)
-library(pheatmap)
-# library(PoiClaClu)
-library(RColorBrewer)
-```
-
-# Import workspace
-
-```{r eval=TRUE}
-fcp = file.copy("DESEQ_WORKSPACE", "deseq.RData")
-load("deseq.RData")
-```
-
-# Visualization
-
-## Heatmaps of sample-to-sample distances {.tabset}
-
-### rlog-transformed values
-
-```{r}
-sampleDistMatrix <- as.matrix( sampleDists )
-colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
-pheatmap(sampleDistMatrix,
-         # clustering_distance_rows = sampleDists,
-         clustering_distance_cols = sampleDists,
-         col = colors)
-```
-
-### Poisson Distance
-
-```{r eval=FALSE}
-count_t = t(counts(dds))
-rownames(count_t) = colnames(counts(dds))
-poisd <- PoissonDistance(count_t)
-samplePoisDistMatrix <- as.matrix( poisd$dd )
-rownames(samplePoisDistMatrix) = rownames(count_t)
-colnames(samplePoisDistMatrix) = rownames(count_t)
-pheatmap(samplePoisDistMatrix,
-         # clustering_distance_rows = poisd$dd,
-         clustering_distance_cols = poisd$dd,
-         col = colors)
-```
-
-
-## PCA plots {.tabset}
-
-### Using `plotPCA()` function 
-
-```{r}
-# interest groups
-col_index = as.numeric(strsplit("INTGROUPS_PCA", ',')[[1]])
-intgroup_pca = colnames(sample_table)[col_index]
-```
-
-```{r}
-plotPCA(rld, intgroup = intgroup_pca)
-```
-
-
-### Using *ggplot2*
-
-```{r}
-pcaData <- plotPCA(rld, intgroup = intgroup_pca, returnData = TRUE)
-percentVar <- round(100 * attr(pcaData, "percentVar"))
-ggplot(pcaData, aes(x = PC1, y = PC2, color = time)) +
-  geom_point(size =3) +
-  xlab(paste0("PC1: ", percentVar[1], "% variance")) +
-  ylab(paste0("PC2: ", percentVar[2], "% variance")) +
-  coord_fixed()
-```
-
-### PCA data table
-
-```{r}
-knitr::kable(pcaData)
-```
-
-
-## MDS plots {.tabset}
-
-### Using rlog-transformed values
-
-```{r}
-mds <- as.data.frame(colData(rld))  %>%
-         cbind(cmdscale(sampleDistMatrix))
-mds
-ggplot(mds, aes(x = `1`, y = `2`, col = time)) +
-  geom_point(size = 3) + coord_fixed()
-```
-
-### Using the *Poisson Distance*
-
-```{r eval=FALSE}
-mdsPois <- as.data.frame(colData(dds)) %>%
-   cbind(cmdscale(samplePoisDistMatrix))
-ggplot(mdsPois, aes(x = `1`, y = `2`, col = time)) +
-  geom_point(size = 3) + coord_fixed()
-```
--- a/DESeq_visualization.xml	Tue Aug 08 15:06:40 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,145 +0,0 @@
-<tool id="DESeq_visualization" name="DESeq2: Visualization" version="1.0.0">
-    <requirements>
-        <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
-        <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement>
-        <requirement type="package" version="1.20.0">r-getopt</requirement>
-        <requirement type="package" version="1.2">r-rmarkdown</requirement>
-        <requirement type="package" version="1.8.4">r-plyr</requirement>
-        <requirement type="package" version="1.1.0">r-stringr</requirement>
-        <requirement type="package" version="0.4.0">r-highcharter</requirement>
-        <requirement type="package" version="0.2">r-dt</requirement>
-        <requirement type="package" version="1.4.2">r-reshape2</requirement>
-        <requirement type="package" version="4.5.6">r-plotly</requirement>
-        <requirement type="package" version="0.2.0.1">r-formattable</requirement>
-        <requirement type="package" version="0.3.5">r-htmltools</requirement>
-        <requirement type="package" version="1.0.8">r-pheatmap</requirement>
-    </requirements>
-    <description>
-        An R Markdown tool to visualize DESeq analysis results.
-    </description>
-    <stdio>
-        <regex match="Execution halted"
-               source="both"
-               level="fatal"
-               description="Execution halted." />
-        <regex match="Error in"
-               source="both"
-               level="fatal"
-               description="An undefined error occured, please check your intput carefully and contact your administrator." />
-        <regex match="Fatal error"
-               source="both"
-               level="fatal"
-               description="An undefined error occured, please check your intput carefully and contact your administrator." />
-    </stdio>
-    <command>
-        <![CDATA[
-
-        Rscript '${__tool_directory__}/DESeq_visualization_render.R'
-
-            ## 1. input data
-            -e $echo
-            -w $deseq_workspace
-            
-            #set $pca_groups = []
-            #for $group in $intgroups_pca
-                #if str($group.intgroup)
-                    #set $pca_groups = $pca_groups + [str($group.intgroup)]
-                #end if
-            #end for
-            #set $pca_groups = ','.join($pca_groups)
-            -p "$pca_groups"
-            
-            #set $mds_groups = []
-            #for $group in $intgroups_mds
-                #if str($group.intgroup)
-                    #set $mds_groups = $mds_groups + [str($group.intgroup)]
-                #end if
-            #end for
-            #set $mds_groups = ','.join($mds_groups)
-            -m "$mds_groups"
-
-            ## 2. output report and report site directory
-		    -o $deseq_visualization
-		    -d $deseq_visualization.files_path
-
-
-		    ## 3. Rmd templates sitting in the tool directory
-
-		        ## _site.yml and index.Rmd template files
-                -D '${__tool_directory__}/DESeq_visualization.Rmd'
-
-
-
-        ]]>
-    </command>
-    <inputs>
-        <param type="data" name="deseq_workspace" format="rdata" multiple="false" label="Workspace from tool DESeq2: DESeq" />
-        <param type="data" name="sample_table" format="tabular" multiple="false" label="Sample table file" />
-        <repeat name="intgroups_pca" title="Interest groups for PCA plot" min="1">
-            <param type="data_column" name="intgroup" data_ref="sample_table" use_header_names="true"
-                   optional="false"
-                   label="Interest group for PCA plot"
-                   help=""/>
-        </repeat>
-        <repeat name="intgroups_mds" title="Interest groups for MDS plot" min="1">
-            <param type="data_column" name="intgroup" data_ref="sample_table" use_header_names="true"
-                   optional="false"
-                   label="Interest group for MDS plot"
-                   help=""/>
-        </repeat>
-        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" />
-    </inputs>
-    <outputs>
-        <data name="deseq_visualization" format="html" label="DESeq Visualization" />
-    </outputs>
-    <citations>
-        <citation>
-            @article{love2014moderated,
-            title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2},
-            author={Love, Michael I and Huber, Wolfgang and Anders, Simon},
-            journal={Genome biology},
-            volume={15},
-            number={12},
-            pages={550},
-            year={2014},
-            publisher={BioMed Central}
-            }
-        </citation>
-        <citation type="bibtex">
-            @article{allaire2016rmarkdown,
-            title={rmarkdown: Dynamic Documents for R, 2016},
-            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
-            journal={R package version 0.9},
-            volume={6},
-            year={2016}
-            }
-        </citation>
-        <citation type="bibtex">
-            @book{xie2015dynamic,
-            title={Dynamic Documents with R and knitr},
-            author={Xie, Yihui},
-            volume={29},
-            year={2015},
-            publisher={CRC Press}
-            }
-        </citation>
-        <citation>
-            @misc{pheatmap2015,
-            title = {pheatmap: Pretty Heatmaps},
-            author = {Raivo Kolde},
-            year = {2015},
-            note = {R package version 1.0.8},
-            url = {https://CRAN.R-project.org/package=pheatmap},
-            }
-        </citation>
-        <citation>
-            @misc{dt2016,
-            title = {DT: A Wrapper of the JavaScript Library 'DataTables'},
-            author = {Yihui Xie},
-            year = {2016},
-            note = {R package version 0.2},
-            url = {https://CRAN.R-project.org/package=DT},
-            }
-        </citation>
-    </citations>
-</tool>
\ No newline at end of file
--- a/DESeq_visualization_render.R	Tue Aug 08 15:06:40 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-##======= Handle arguments from command line ========
-# setup R error handline to go to stderr
-options(show.error.messages=FALSE,
-        error=function(){
-          cat(geterrmessage(), file=stderr())
-          quit("no", 1, F)
-        })
-
-# we need that to not crash galaxy with an UTF8 error on German LC settings.
-loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
-
-# suppress warning
-options(warn = -1)
-
-options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)
-args = commandArgs(trailingOnly=TRUE)
-
-suppressPackageStartupMessages({
-  library(getopt)
-  library(tools)
-})
-
-# column 1: the long flag name
-# column 2: the short flag alias. A SINGLE character string
-# column 3: argument mask
-#           0: no argument
-#           1: argument required
-#           2: argument is optional
-# column 4: date type to which the flag's argument shall be cast.
-#           possible values: logical, integer, double, complex, character.
-spec_list=list()
-
-##------- 1. input data ---------------------
-spec_list$ECHO = c('echo', 'e', '1', 'character')
-spec_list$DESEQ_WORKSPACE = c('deseq_workspace', 'w', '1', 'character')
-spec_list$SAMPLE_TABLE = c('sample_table', 's', '1', 'character')
-spec_list$INTGROUPS_PCA = c('intgroups_pca', 'p', '1', 'character')
-spec_list$INTGROUPS_MDS = c('intgroups_mds', 'm', '1', 'character')
-
-##--------2. output report and report site directory --------------
-spec_list$OUTPUT_HTML = c('deseq_visualization_html', 'o', '1', 'character')
-spec_list$OUTPUT_DIR = c('deseq_visualization_dir', 'd', '1', 'character')
-
-##--------3. Rmd templates sitting in the tool directory ----------
-
-spec_list$DESEQ_VISUALIZATION_RMD = c('deseq_visualization_rmd', 'D', '1', 'character')
-
-
-
-##------------------------------------------------------------------
-
-spec = t(as.data.frame(spec_list))
-opt = getopt(spec)
-# arguments are accessed by long flag name (the first column in the spec matrix)
-#                        NOT by element name in the spec_list
-# example: opt$help, opt$expression_file
-##====== End of arguments handling ==========
-
-#------ Load libraries ---------
-library(rmarkdown)
-library(plyr)
-library(stringr)
-library(dplyr)
-library(highcharter)
-library(DT)
-library(reshape2)
-# library(Kmisc)
-library(plotly)
-library(formattable)
-library(htmltools)
-
-
-#----- 1. create the report directory ------------------------
-system(paste0('mkdir -p ', opt$deseq_visualization_dir))
-
-
-#----- 2. generate Rmd files with Rmd templates --------------
-#   a. templates without placeholder variables:
-#         copy templates from tool directory to the working directory.
-#   b. templates with placeholder variables:
-#         substitute variables with user input values and place them in the working directory.
-
-
-#----- 01 DESeq_visualization.Rmd -----------------------
-readLines(opt$deseq_visualization_rmd) %>%
-  (function(x) {
-    gsub('ECHO', opt$echo, x)
-  }) %>%
-  (function(x) {
-    gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x)
-  }) %>%
-  (function(x) {
-    gsub('INTGROUPS_PCA', opt$intgroups_pca, x)
-  }) %>%
-  (function(x) {
-    gsub('INTGROUPS_MDS', opt$intgroups_mds, x)
-  }) %>%
-  (function(x) {
-    gsub('OUTPUT_DIR', opt$deseq_visualization_dir, x)
-  }) %>%
-  (function(x) {
-    fileConn = file('DESeq_visualization.Rmd')
-    writeLines(x, con=fileConn)
-    close(fileConn)
-  })
-
-
-#------ 3. render all Rmd files --------
-render('DESeq_visualization.Rmd', output_file = opt$deseq_visualization_html)
-
-
-#-------4. manipulate outputs -----------------------------