changeset 0:4275479ada3a draft

planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
author mingchen0919
date Tue, 08 Aug 2017 12:35:50 -0400
parents
children 337fedd38522
files wgcna_construct_network.Rmd wgcna_construct_network.xml wgcna_construct_network_render.R wgcna_eigengene_visualization.Rmd wgcna_eigengene_visualization.xml wgcna_eigengene_visualization_render.R wgcna_preprocessing.Rmd wgcna_preprocessing.xml wgcna_preprocessing_render.R
diffstat 9 files changed, 999 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wgcna_construct_network.Rmd	Tue Aug 08 12:35:50 2017 -0400
@@ -0,0 +1,178 @@
+---
+title: 'WGCNA: construct network'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO
+)
+```
+
+# Import workspace 
+
+This step imports workspace from the **WGCNA: preprocessing** step.
+
+```{r}
+fcp = file.copy("PREPROCESSING_WORKSPACE", "deseq.RData")
+load("deseq.RData")
+```
+
+
+# Processing outliers {.tabset}
+
+## Before removing outliers
+
+```{r}
+plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
+     cex.axis = 1, cex.main = 1, cex = 0.5)
+if(!is.na(HEIGHT_CUT)) {
+  # plot a line to show the cut
+  abline(h = HEIGHT_CUT, col = "red")
+  # determine cluster under the line
+  clust = cutreeStatic(sampleTree, cutHeight = HEIGHT_CUT, minSize = 10)
+  keepSamples = (clust==1)
+  expression_data = expression_data[keepSamples, ]
+}
+```
+
+## After removing outliers
+
+```{r}
+sampleTree = hclust(dist(expression_data), method = "average");
+plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="",
+     cex.axis = 1, cex.main = 1, cex = 0.5)
+```
+
+
+# Trait data {.tabeset}
+
+If trait data is provided, the first 100 rows from the data will be displayed here. A plot consisting of sample cluster dendrogram and trait heatmap will also be gerenated.
+
+## Trait data table
+
+```{r}
+trait_data = data.frame()
+if ("TRAIT_DATA" != 'None') {
+  trait_data = read.csv("TRAIT_DATA", header = TRUE, row.names = 1)
+  # form a data frame analogous to expression data that will hold the traits.
+  sample_names = rownames(expression_data)
+  trait_rows = match(sample_names, rownames(trait_data))
+  trait_data = trait_data[trait_rows, ]
+  datatable(head(trait_data, 100), style="bootstrap", filter = 'top',
+          class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
+}
+```
+
+## Dendrogram and heatmap
+
+```{r fig.align='center', fig.width=8, fig.height=9}
+if (nrow(trait_data) != 0) {
+  traitColors = numbers2colors(trait_data, signed = FALSE)
+  plotDendroAndColors(sampleTree, traitColors,
+                      groupLabels = names(trait_data),
+                      main = "Sample dendrogram and trait heatmap",
+                      cex.dendroLabels = 0.5)
+}
+```
+
+
+# The thresholding power
+
+```{r}
+powers = c(1:10, seq(12, 20, 2))
+soft_threshold = pickSoftThreshold(expression_data, powerVector = powers, verbose = 5)
+```
+
+```{r fig.align='center'}
+par(mfrow=c(1,2))
+plot(soft_threshold$fitIndices[,1], -sign(soft_threshold$fitIndices[,3])*soft_threshold$fitIndices[,2],
+     xlab="Soft Threshold (power)",
+     ylab="Scale Free Topology Model Fit,signed R^2",type="n",
+     main = paste("Scale independence"),
+     cex.lab = 0.5);
+text(soft_threshold$fitIndices[,1], -sign(soft_threshold$fitIndices[,3])*soft_threshold$fitIndices[,2],
+     labels=powers,cex=0.5,col="red");
+
+# calculate soft threshold power
+y = -sign(soft_threshold$fitIndices[,3])*soft_threshold$fitIndices[,2]
+r2_cutoff = 0.9
+for(i in 1:length(powers)) {
+  if(y[i] > r2_cutoff) {
+    soft_threshold_power = soft_threshold$fitIndices[,1][i]
+    r2_cutoff_new = y[i]
+    break
+  } 
+  soft_threshold_power = soft_threshold$fitIndices[,1][length(powers)]
+}
+abline(h=r2_cutoff, col="red")
+abline(v=soft_threshold_power, col="blue")
+text(soft_threshold_power+1, r2_cutoff-0.1, 
+     paste0('R^2 cutoff = ', round(r2_cutoff_new,2)),
+     cex = 0.5, col = "red")
+
+plot(soft_threshold$fitIndices[,1], soft_threshold$fitIndices[,5],
+     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
+     main = paste("Mean connectivity"),
+     cex.lab = 0.5)
+text(soft_threshold$fitIndices[,1], soft_threshold$fitIndices[,5], labels=powers, cex=0.5,col="red")
+par(mfrow=c(1,1))
+```
+
+
+# Construct network 
+
+The gene network is constructed based on **soft threshold power = `r soft_threshold_power`**
+
+```{r}
+gene_network = blockwiseModules(expression_data, power = soft_threshold_power,
+                                TOMType = "unsigned", minModuleSize = 30,
+                                reassignThreshold = 0, mergeCutHeight = 0.25,
+                                numericLabels = TRUE, pamRespectsDendro = FALSE,
+                                verbose = 3)
+```
+
+
+# Gene modules {.tabset}
+
+## Idenfity gene modules 
+
+```{r}
+modules = table(gene_network$colors)
+n_modules = length(modules) - 1
+module_size_upper = modules[2]
+module_size_lower = modules[length(modules)]
+
+module_table = data.frame(model_label = c(0, 1:n_modules),
+                          gene_size = as.vector(modules))
+datatable(t(module_table))
+```
+
+The results above indicates that there are **`r n_modules` gene modules**, labeled 1 through `r length(n_modules)` in order of descending size. The largest module has **`r module_size_upper` genes**, and the smallest module has **`r module_size_lower` genes**. The label 0 is reserved for genes outside of all modules. 
+
+
+## Dendrogram and module plot
+
+```{r}
+# Convert labels to colors for plotting
+module_colors = labels2colors(gene_network$colors)
+# Plot the dendrogram and the module colors underneath
+plotDendroAndColors(gene_network$dendrograms[[1]], module_colors[gene_network$blockGenes[[1]]],
+                    "Module colors",
+                    dendroLabels = FALSE, hang = 0.03,
+                    addGuide = TRUE, guideHang = 0.05)
+```
+
+
+```{r echo=FALSE}
+# save workspace
+rm("opt")
+save(list=ls(all.names = TRUE), file='CONSTRUCT_NETWORK_WORKSPACE')
+```
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wgcna_construct_network.xml	Tue Aug 08 12:35:50 2017 -0400
@@ -0,0 +1,105 @@
+<tool id="wgcna_construct_network" name="WGCNA: construct network" version="1.0.0">
+    <requirements>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.2">r-rmarkdown</requirement>
+        <requirement type="package" version="1.8.4">r-plyr</requirement>
+        <requirement type="package" version="0.4.0">r-highcharter</requirement>
+        <requirement type="package" version="0.2">r-dt</requirement>
+        <requirement type="package" version="0.3.5">r-htmltools</requirement>
+        <requirement type="package" version="1.51">r-wgcna</requirement>
+    </requirements>
+    <description>
+        Construct gene network.
+    </description>
+    <stdio>
+        <regex match="Execution halted"
+               source="both"
+               level="fatal"
+               description="Execution halted." />
+        <regex match="Error in"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your intput carefully and contact your administrator." />
+        <regex match="Fatal error"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your intput carefully and contact your administrator." />
+    </stdio>
+    <command>
+        <![CDATA[
+
+        Rscript '${__tool_directory__}/wgcna_construct_network_render.R'
+
+            ## 1. input data
+            -e $echo
+            -w $preprocessing_workspace
+            -h '$height_cut'
+            -t $trait_data
+
+
+
+            ## 2. output report and report site directory
+		    -o $wgcna_construct_network
+		    -d $wgcna_construct_network.files_path
+		    -W $construct_network_workspace
+
+
+		    ## 3. Rmd templates in the tool directory
+
+		        ## _site.yml and index.Rmd template files
+                -M '${__tool_directory__}/wgcna_construct_network.Rmd'
+
+
+
+        ]]>
+    </command>
+    <inputs>
+        <param type="data" name="preprocessing_workspace" format="rdata" optional="false"
+               label="R workspace from WGCNA: preprocessing" />
+        <param type="float" name="height_cut" optional="true" label="Height"
+               help="Refer to the sample clustering plot from WGCNA: preprocessing and choose a height cut that will
+                    remove outliers. If there is not outlier, leave this field blank." />
+        <param type="data" name="trait_data" format="csv" optional="true"
+               label="Trait data"
+               help="If trait data is provided, a plot consisting of sample clustering and trait heatmap will
+                    be generated. This field is optional. "/>
+
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" />
+    </inputs>
+    <outputs>
+        <data name="wgcna_construct_network" format="html" label="WGCNA: construct_network" />
+        <data name="construct_network_workspace" format="rdata" label="R workspace: WGCNA construct_network" />
+    </outputs>
+    <citations>
+        <citation type="bibtex">
+            @article{langfelder2008wgcna,
+            title={WGCNA: an R package for weighted correlation network analysis},
+            author={Langfelder, Peter and Horvath, Steve},
+            journal={BMC bioinformatics},
+            volume={9},
+            number={1},
+            pages={559},
+            year={2008},
+            publisher={BioMed Central}
+            }
+        </citation>
+        <citation type="bibtex">
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        </citation>
+        <citation type="bibtex">
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        </citation>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wgcna_construct_network_render.R	Tue Aug 08 12:35:50 2017 -0400
@@ -0,0 +1,112 @@
+##======= Handle arguments from command line ========
+# setup R error handline to go to stderr
+options(show.error.messages=FALSE,
+        error=function(){
+          cat(geterrmessage(), file=stderr())
+          quit("no", 1, F)
+        })
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+# suppress warning
+options(warn = -1)
+
+options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)
+args = commandArgs(trailingOnly=TRUE)
+
+suppressPackageStartupMessages({
+  library(getopt)
+  library(tools)
+})
+
+# column 1: the long flag name
+# column 2: the short flag alias. A SINGLE character string
+# column 3: argument mask
+#           0: no argument
+#           1: argument required
+#           2: argument is optional
+# column 4: date type to which the flag's argument shall be cast.
+#           possible values: logical, integer, double, complex, character.
+spec_list=list()
+
+##------- 1. input data ---------------------
+spec_list$ECHO = c('echo', 'e', '1', 'character')
+spec_list$PREPROCESSING_WORKSPACE = c('preprocessing_workspace', 'w', '1', 'character')
+spec_list$HEIGHT_CUT = c('height_cut', 'h', '2', 'double')
+spec_list$TRAIT_DATA = c('trait_data', 't', '2', 'character')
+
+
+##--------2. output report and report site directory --------------
+spec_list$OUTPUT_HTML = c('wgcna_construct_network_html', 'o', '1', 'character')
+spec_list$OUTPUT_DIR = c('wgcna_construct_network_dir', 'd', '1', 'character')
+spec_list$CONSTRUCT_NETWORK_WORKSPACE = c('construct_network_workspace', 'W', '1', 'character')
+
+
+##--------3. Rmd templates in the tool directory ----------
+
+spec_list$WGCNA_PREPROCESSING_RMD = c('wgcna_construct_network_rmd', 'M', '1', 'character')
+
+
+
+##------------------------------------------------------------------
+
+spec = t(as.data.frame(spec_list))
+opt = getopt(spec)
+# arguments are accessed by long flag name (the first column in the spec matrix)
+#                        NOT by element name in the spec_list
+# example: opt$help, opt$expression_file
+##====== End of arguments handling ==========
+
+#------ Load libraries ---------
+library(rmarkdown)
+library(WGCNA)
+library(DT)
+library(htmltools)
+library(ggplot2)
+
+
+#----- 1. create the report directory ------------------------
+system(paste0('mkdir -p ', opt$wgcna_construct_network_dir))
+
+
+#----- 2. generate Rmd files with Rmd templates --------------
+#   a. templates without placeholder variables:
+#         copy templates from tool directory to the working directory.
+#   b. templates with placeholder variables:
+#         substitute variables with user input values and place them in the working directory.
+
+
+#----- 01 wgcna_construct_network.Rmd -----------------------
+readLines(opt$wgcna_construct_network_rmd) %>%
+  (function(x) {
+    gsub('ECHO', opt$echo, x)
+  }) %>%
+  (function(x) {
+    gsub('PREPROCESSING_WORKSPACE', opt$preprocessing_workspace, x)
+  }) %>%
+  (function(x) {
+    gsub('HEIGHT_CUT', opt$height_cut, x)
+  }) %>%
+  (function(x) {
+    gsub('TRAIT_DATA', opt$trait_data, x)
+  }) %>%
+  (function(x) {
+    gsub('OUTPUT_DIR', opt$wgcna_construct_network_dir, x)
+  }) %>%
+  (function(x) {
+    gsub('CONSTRUCT_NETWORK_WORKSPACE', opt$construct_network_workspace, x)
+  }) %>%
+  (function(x) {
+    fileConn = file('wgcna_construct_network.Rmd')
+    writeLines(x, con=fileConn)
+    close(fileConn)
+  })
+
+
+#------ 3. render all Rmd files --------
+render('wgcna_construct_network.Rmd', output_file = opt$wgcna_construct_network_html)
+
+#-------4. manipulate outputs -----------------------------
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wgcna_eigengene_visualization.Rmd	Tue Aug 08 12:35:50 2017 -0400
@@ -0,0 +1,121 @@
+---
+title: 'WGCNA: eigengene visualization'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO
+)
+```
+
+# Import workspace 
+
+This step imports workspace from the **WGCNA: construct network** step.
+
+```{r}
+fcp = file.copy("CONSTRUCT_NETWORK_WORKSPACE", "deseq.RData")
+load("deseq.RData")
+```
+
+
+# Gene modules {.tabset}
+
+```{r}
+if(!is.na(SOFT_THRESHOLD_POWER)) soft_threshold_power = SOFT_THRESHOLD_POWER
+```
+
+## Identify gene modules
+
+The gene network is constructed based on **soft threshold power = `r soft_threshold_power`**
+
+```{r}
+gene_network = blockwiseModules(expression_data, power = soft_threshold_power,
+                                TOMType = "unsigned", minModuleSize = 30,
+                                reassignThreshold = 0, mergeCutHeight = 0.25,
+                                numericLabels = TRUE, pamRespectsDendro = FALSE,
+                                verbose = 3)
+```
+
+
+```{r}
+modules = table(gene_network$colors)
+n_modules = length(modules) - 1
+module_size_upper = modules[2]
+module_size_lower = modules[length(modules)]
+
+module_table = data.frame(model_label = c(0, 1:n_modules),
+                          gene_size = as.vector(modules))
+datatable(t(module_table))
+```
+
+The results above indicates that there are **`r n_modules` gene modules**, labeled 1 through `r length(n_modules)` in order of descending size. The largest module has **`r module_size_upper` genes**, and the smallest module has **`r module_size_lower` genes**. The label 0 is reserved for genes outside of all modules. 
+
+
+## Dendrogram and module plot
+
+```{r}
+# Convert labels to colors for plotting
+module_colors = labels2colors(gene_network$colors)
+# Plot the dendrogram and the module colors underneath
+plotDendroAndColors(gene_network$dendrograms[[1]], module_colors[gene_network$blockGenes[[1]]],
+                    "Module colors",
+                    dendroLabels = FALSE, hang = 0.03,
+                    addGuide = TRUE, guideHang = 0.05)
+```
+
+
+# Gene module correlation
+
+We can calculate eigengenes and use them as representative profiles to quantify similarity of found gene modules.
+
+```{r}
+n_genes = ncol(expression_data)
+n_samples = nrow(expression_data)
+```
+
+```{r}
+diss_tom = 1-TOMsimilarityFromExpr(expression_data, power = soft_threshold_power)
+set.seed(123)
+select_genes = sample(n_genes, size = PLOT_GENES)
+select_diss_tom = diss_tom[select_genes, select_genes]
+
+# calculate gene tree on selected genes
+select_gene_tree = hclust(as.dist(select_diss_tom), method = 'average')
+select_module_colors = module_colors[select_genes]
+
+# transform diss_tom with a power to make moderately strong connections more visiable in the heatmap.
+plot_diss_tom = select_diss_tom^7
+# set diagonal to NA for a nicer plot
+diag(plot_diss_tom) = NA
+```
+
+
+```{r fig.align='center'}
+TOMplot(plot_diss_tom, select_gene_tree, select_module_colors, main = "Network heatmap")
+```
+
+
+# Eigengene visualization {.tabset}
+
+## Eigengene dendrogram
+
+```{r fig.align='center'}
+module_eigengenes = moduleEigengenes(expression_data, module_colors)$eigengenes
+plotEigengeneNetworks(module_eigengenes, "Eigengene dendrogram", 
+                      plotHeatmaps = FALSE)
+```
+
+## Eigengene adjacency heatmap
+
+```{r fig.align='center'}
+plotEigengeneNetworks(module_eigengenes, "Eigengene adjacency heatmap", 
+                      marHeatmap = c(2, 3, 2, 2),
+                      plotDendrograms = FALSE, xLabelsAngle = 90)
+```
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wgcna_eigengene_visualization.xml	Tue Aug 08 12:35:50 2017 -0400
@@ -0,0 +1,100 @@
+<tool id="wgcna_eigengene_visualization" name="WGCNA: eigengene visualization" version="1.0.0">
+    <requirements>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.2">r-rmarkdown</requirement>
+        <requirement type="package" version="1.8.4">r-plyr</requirement>
+        <requirement type="package" version="0.4.0">r-highcharter</requirement>
+        <requirement type="package" version="0.2">r-dt</requirement>
+        <requirement type="package" version="0.3.5">r-htmltools</requirement>
+        <requirement type="package" version="1.51">r-wgcna</requirement>
+    </requirements>
+    <description>
+        Eigengene visualization.
+    </description>
+    <stdio>
+        <regex match="Execution halted"
+               source="both"
+               level="fatal"
+               description="Execution halted." />
+        <regex match="Error in"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your intput carefully and contact your administrator." />
+        <regex match="Fatal error"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your intput carefully and contact your administrator." />
+    </stdio>
+    <command>
+        <![CDATA[
+        ## Add tools to PATH
+        export PATH=/opt/R-3.2.5/bin:\$PATH &&
+
+        Rscript '${__tool_directory__}/wgcna_eigengene_visualization_render.R'
+
+            ## 1. input data
+            -e $echo
+            -w $construct_network_workspace
+            -p '$soft_threshold_power'
+            -n $plot_genes
+
+
+            ## 2. output report and report site directory
+		    -o $wgcna_eigengene_visualization
+		    -d $wgcna_eigengene_visualization.files_path
+
+		    ## 3. Rmd templates in the tool directory
+
+                -M '${__tool_directory__}/wgcna_eigengene_visualization.Rmd'
+
+
+
+        ]]>
+    </command>
+    <inputs>
+        <param type="data" name="construct_network_workspace" format="rdata" optional="false"
+               label="R workspace from WGCNA: construct network" />
+        <param type="integer" name="soft_threshold_power" optional="true" label="Soft threshold power"
+               help="Refer to the scale independence plot from 'WGCNA: construct network' and choose an optimal soft threshold power.
+               An optimal power will be calculated automatically if no value is provided." />
+        <param type="integer" name="plot_genes" value="400" min="1" label="Number of genes" optional="false"
+               help="The number of genes that will be used. It is possible to speed up the plotting by providing a subset of
+                    genes. However, the gene dendrogram may ofter look different from dendrogram of all genes." />
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" />
+    </inputs>
+    <outputs>
+        <data name="wgcna_eigengene_visualization" format="html" label="WGCNA: eigengene visualization" />
+    </outputs>
+    <citations>
+        <citation type="bibtex">
+            @article{langfelder2008wgcna,
+            title={WGCNA: an R package for weighted correlation network analysis},
+            author={Langfelder, Peter and Horvath, Steve},
+            journal={BMC bioinformatics},
+            volume={9},
+            number={1},
+            pages={559},
+            year={2008},
+            publisher={BioMed Central}
+            }
+        </citation>
+        <citation type="bibtex">
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        </citation>
+        <citation type="bibtex">
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        </citation>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wgcna_eigengene_visualization_render.R	Tue Aug 08 12:35:50 2017 -0400
@@ -0,0 +1,109 @@
+##======= Handle arguments from command line ========
+# setup R error handline to go to stderr
+options(show.error.messages=FALSE,
+        error=function(){
+          cat(geterrmessage(), file=stderr())
+          quit("no", 1, F)
+        })
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+# suppress warning
+options(warn = -1)
+
+options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)
+args = commandArgs(trailingOnly=TRUE)
+
+suppressPackageStartupMessages({
+  library(getopt)
+  library(tools)
+})
+
+# column 1: the long flag name
+# column 2: the short flag alias. A SINGLE character string
+# column 3: argument mask
+#           0: no argument
+#           1: argument required
+#           2: argument is optional
+# column 4: date type to which the flag's argument shall be cast.
+#           possible values: logical, integer, double, complex, character.
+spec_list=list()
+
+##------- 1. input data ---------------------
+spec_list$ECHO = c('echo', 'e', '1', 'character')
+spec_list$CONSTRUCT_NETWORK_WORKSPACE = c('construct_network_workspace', 'w', '1', 'character')
+spec_list$SOFT_THRESHOLD_POWER = c('soft_threshold_power', 'p', '2', 'double')
+spec_list$PLOT_GENES = c('plot_genes', 'n', '1', 'integer')
+
+
+##--------2. output report and report site directory --------------
+spec_list$OUTPUT_HTML = c('wgcna_eigengene_visualization_html', 'o', '1', 'character')
+spec_list$OUTPUT_DIR = c('wgcna_eigengene_visualization_dir', 'd', '1', 'character')
+
+
+
+##--------3. Rmd templates in the tool directory ----------
+
+spec_list$WGCNA_EIGENGENE_VISUALIZATION_RMD = c('wgcna_eigengene_visualization_rmd', 'M', '1', 'character')
+
+
+
+##------------------------------------------------------------------
+
+spec = t(as.data.frame(spec_list))
+opt = getopt(spec)
+# arguments are accessed by long flag name (the first column in the spec matrix)
+#                        NOT by element name in the spec_list
+# example: opt$help, opt$expression_file
+##====== End of arguments handling ==========
+
+#------ Load libraries ---------
+library(rmarkdown)
+library(WGCNA)
+library(DT)
+library(htmltools)
+library(ggplot2)
+
+
+#----- 1. create the report directory ------------------------
+system(paste0('mkdir -p ', opt$wgcna_eigengene_visualization_dir))
+
+
+#----- 2. generate Rmd files with Rmd templates --------------
+#   a. templates without placeholder variables:
+#         copy templates from tool directory to the working directory.
+#   b. templates with placeholder variables:
+#         substitute variables with user input values and place them in the working directory.
+
+
+#----- 01 wgcna_eigengene_visualization.Rmd -----------------------
+readLines(opt$wgcna_eigengene_visualization_rmd) %>%
+  (function(x) {
+    gsub('ECHO', opt$echo, x)
+  }) %>%
+  (function(x) {
+    gsub('CONSTRUCT_NETWORK_WORKSPACE', opt$construct_network_workspace, x)
+  }) %>%
+  (function(x) {
+    gsub('SOFT_THRESHOLD_POWER', opt$soft_threshold_power, x)
+  }) %>%
+  (function(x) {
+    gsub('PLOT_GENES', opt$plot_genes, x)
+  }) %>%
+  (function(x) {
+    gsub('OUTPUT_DIR', opt$wgcna_eigengene_visualization_dir, x)
+  }) %>%
+  (function(x) {
+    fileConn = file('wgcna_eigengene_visualization.Rmd')
+    writeLines(x, con=fileConn)
+    close(fileConn)
+  })
+
+
+#------ 3. render all Rmd files --------
+render('wgcna_eigengene_visualization.Rmd', output_file = opt$wgcna_eigengene_visualization_html)
+
+#-------4. manipulate outputs -----------------------------
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wgcna_preprocessing.Rmd	Tue Aug 08 12:35:50 2017 -0400
@@ -0,0 +1,76 @@
+---
+title: 'WGCNA: data preprocessing'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO
+)
+```
+
+```{r}
+str(opt)
+```
+
+# Import data
+
+Each row represents a gene and each column represents a sample.
+
+```{r}
+expression_data = read.csv('EXPRESSION_DATA', header = TRUE, row.names = 1)
+```
+
+Display the first 100 genes.
+
+```{r}
+datatable(head(expression_data, 100), style="bootstrap", filter = 'top',
+          class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
+```
+
+Transpose expression data matrix so that each row represents a sample and each column represents a gene.
+
+```{r}
+expression_data = as.data.frame(t(expression_data))
+```
+
+# Checking data
+
+Checking data for excessive missing values and identification of outlier microarray samples.
+
+```{r}
+gsg = goodSamplesGenes(expression_data, verbose = 3)
+if (!gsg$allOK) {
+  # Optionally, print the gene and sample names that were removed:
+  if (sum(!gsg$goodGenes)>0)
+    printFlush(paste("Removing genes:", paste(names(expression_data)[!gsg$goodGenes], collapse = ", ")));
+  if (sum(!gsg$goodSamples)>0)
+    printFlush(paste("Removing samples:", paste(rownames(expression_data)[!gsg$goodSamples], collapse = ", ")));
+  # Remove the offending genes and samples from the data:
+  expression_data = expression_data[gsg$goodSamples, gsg$goodGenes]
+} else {
+  print('all genes are OK!')
+}
+```
+
+# Clustering samples
+
+If there are any outliers, choose a height cut that will remove the offending sample. Remember this number since you will need this number in further analysis.
+
+```{r fig.align='center'}
+sampleTree = hclust(dist(expression_data), method = "average");
+plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="",
+     cex.axis = 1, cex.main = 1, cex = 0.5)
+```
+
+
+```{r echo=FALSE}
+rm("opt")
+save(list=ls(all.names = TRUE), file='PREPROCESSING_WORKSPACE')
+```
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wgcna_preprocessing.xml	Tue Aug 08 12:35:50 2017 -0400
@@ -0,0 +1,96 @@
+<tool id="wgcna_preprocessing" name="WGCNA: preprocessing" version="1.0.0">
+    <requirements>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.2">r-rmarkdown</requirement>
+        <requirement type="package" version="1.8.4">r-plyr</requirement>
+        <requirement type="package" version="0.4.0">r-highcharter</requirement>
+        <requirement type="package" version="0.2">r-dt</requirement>
+        <requirement type="package" version="0.3.5">r-htmltools</requirement>
+        <requirement type="package" version="1.51">r-wgcna</requirement>
+    </requirements>
+    <description>
+        Data clearning and preprocessing.
+    </description>
+    <stdio>
+        <regex match="Execution halted"
+               source="both"
+               level="fatal"
+               description="Execution halted." />
+        <regex match="Error in"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your intput carefully and contact your administrator." />
+        <regex match="Fatal error"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your intput carefully and contact your administrator." />
+    </stdio>
+    <command>
+        <![CDATA[
+        ## Add tools to PATH
+        export PATH=/opt/R-3.2.5/bin:\$PATH &&
+
+        Rscript '${__tool_directory__}/wgcna_preprocessing_render.R'
+
+            ## 1. input data
+            -e $echo
+            -E $expression_data
+
+
+            ## 2. output report and report site directory
+		    -o $wgcna_preprocessing
+		    -d $wgcna_preprocessing.files_path
+		    -w $preprocessing_workspace
+
+		    ## 3. Rmd templates sitting in the tool directory
+
+		        ## _site.yml and index.Rmd template files
+                -D '${__tool_directory__}/wgcna_preprocessing.Rmd'
+
+
+
+        ]]>
+    </command>
+    <inputs>
+        <param type="data" name="expression_data" format="csv" optional="false" label="Gene expression data"
+               help="Each row represents a gene and each column represents a sample."/>
+
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" />
+    </inputs>
+    <outputs>
+        <data name="wgcna_preprocessing" format="html" label="WGCNA: preprocessing" />
+        <data name="preprocessing_workspace" format="rdata" label="R workspace: WGCNA preprocessing" />
+    </outputs>
+    <citations>
+        <citation type="bibtex">
+            @article{langfelder2008wgcna,
+            title={WGCNA: an R package for weighted correlation network analysis},
+            author={Langfelder, Peter and Horvath, Steve},
+            journal={BMC bioinformatics},
+            volume={9},
+            number={1},
+            pages={559},
+            year={2008},
+            publisher={BioMed Central}
+            }
+        </citation>
+        <citation type="bibtex">
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        </citation>
+        <citation type="bibtex">
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        </citation>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wgcna_preprocessing_render.R	Tue Aug 08 12:35:50 2017 -0400
@@ -0,0 +1,102 @@
+##======= Handle arguments from command line ========
+# setup R error handline to go to stderr
+options(show.error.messages=FALSE,
+        error=function(){
+          cat(geterrmessage(), file=stderr())
+          quit("no", 1, F)
+        })
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+# suppress warning
+options(warn = -1)
+
+options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)
+args = commandArgs(trailingOnly=TRUE)
+
+suppressPackageStartupMessages({
+  library(getopt)
+  library(tools)
+})
+
+# column 1: the long flag name
+# column 2: the short flag alias. A SINGLE character string
+# column 3: argument mask
+#           0: no argument
+#           1: argument required
+#           2: argument is optional
+# column 4: date type to which the flag's argument shall be cast.
+#           possible values: logical, integer, double, complex, character.
+spec_list=list()
+
+##------- 1. input data ---------------------
+spec_list$ECHO = c('echo', 'e', '1', 'character')
+spec_list$EXPRESSION_DATA = c('expression_data', 'E', '1', 'character')
+
+
+##--------2. output report and report site directory --------------
+spec_list$OUTPUT_HTML = c('wgcna_preprocessing_html', 'o', '1', 'character')
+spec_list$OUTPUT_DIR = c('wgcna_preprocessing_dir', 'd', '1', 'character')
+spec_list$PREPROCESSING_WORKSPACE = c('preprocessing_workspace', 'w', '1', 'character')
+
+##--------3. Rmd templates sitting in the tool directory ----------
+
+spec_list$WGCNA_PREPROCESSING_RMD = c('wgcna_preprocessing_rmd', 'D', '1', 'character')
+
+
+
+##------------------------------------------------------------------
+
+spec = t(as.data.frame(spec_list))
+opt = getopt(spec)
+# arguments are accessed by long flag name (the first column in the spec matrix)
+#                        NOT by element name in the spec_list
+# example: opt$help, opt$expression_file
+##====== End of arguments handling ==========
+
+#------ Load libraries ---------
+library(rmarkdown)
+library(WGCNA)
+library(DT)
+library(htmltools)
+
+
+#----- 1. create the report directory ------------------------
+system(paste0('mkdir -p ', opt$wgcna_preprocessing_dir))
+
+
+#----- 2. generate Rmd files with Rmd templates --------------
+#   a. templates without placeholder variables:
+#         copy templates from tool directory to the working directory.
+#   b. templates with placeholder variables:
+#         substitute variables with user input values and place them in the working directory.
+
+
+#----- 01 wgcna_preprocessing.Rmd -----------------------
+readLines(opt$wgcna_preprocessing_rmd) %>%
+  (function(x) {
+    gsub('ECHO', opt$echo, x)
+  }) %>%
+  (function(x) {
+    gsub('EXPRESSION_DATA', opt$expression_data, x)
+  }) %>%
+  (function(x) {
+    gsub('OUTPUT_DIR', opt$wgcna_preprocessing_dir, x)
+  }) %>%
+  (function(x) {
+    gsub('PREPROCESSING_WORKSPACE', opt$preprocessing_workspace, x)
+  }) %>%
+  (function(x) {
+    fileConn = file('wgcna_preprocessing.Rmd')
+    writeLines(x, con=fileConn)
+    close(fileConn)
+  })
+
+
+#------ 3. render all Rmd files --------
+render('wgcna_preprocessing.Rmd', output_file = opt$wgcna_preprocessing_html)
+
+#-------4. manipulate outputs -----------------------------
+
+