changeset 6:2f4df2be0572 draft

planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
author mingchen0919
date Tue, 08 Aug 2017 12:35:11 -0400
parents 0ac073bef19d
children d820be692d74
files 01_evaluation_overview.Rmd 02_fastqc_original_reports.Rmd 1_per_base_quality_scores.Rmd 2_per_base_N_content.Rmd 3_per_sequence_quality_scores.Rmd 4_per_sequence_GC_content.Rmd 5_per_base_sequence_content.Rmd _site.yml fastqc_site.xml fastqc_site_render.R index.Rmd wgcna_construct_network.Rmd wgcna_construct_network.xml wgcna_construct_network_render.R wgcna_eigengene_visualization.Rmd wgcna_eigengene_visualization.xml wgcna_eigengene_visualization_render.R wgcna_preprocessing.Rmd wgcna_preprocessing.xml wgcna_preprocessing_render.R
diffstat 20 files changed, 999 insertions(+), 764 deletions(-) [+]
line wrap: on
line diff
--- a/01_evaluation_overview.Rmd	Tue Aug 08 11:45:41 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,123 +0,0 @@
----
-title: "Evaluation Overview"
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-```{bash 'copy data from datasets directory to working directory', echo=FALSE}
-# Copy uploaded data to the working directory
-for f in $(echo READS | sed "s/,/ /g")
-do
-    cp $f ./
-done
-```
-
-```{bash 'run fastqc', echo=FALSE}
-# run fastqc and place outputs into the report directory
-for r in $(ls *.dat)
-do
-    fastqc -o REPORT_OUTPUT_DIR $r > /dev/null 2>&1
-done
-```
-
-```{bash 'parse fastqc results', echo=FALSE}
-##==== copy fastqc generated zip files from report output directory to job work directory ==
-cp -r REPORT_OUTPUT_DIR/*zip ./
-
-# create a file to store data file paths
-echo "sample_id,file_path" > PWF_file_paths.txt # Pass, Warning, Fail
-echo "sample_id,file_path" > PBQS_file_paths.txt # Per Base Quality Score
-echo "sample_id,file_path" > PSQS_file_paths.txt # Per Sequence Quality Score
-echo "sample_id,file_path" > PSGC_file_paths.txt # Per Sequence GC Content
-echo "sample_id,file_path" > PBSC_file_paths.txt # Per Base Sequence Content
-echo "sample_id,file_path" > PBNC_file_paths.txt # Per Base N Content
-echo "sample_id,file_path" > SDL_file_paths.txt # Sequence Duplication Level
-echo "sample_id,file_path" > SLD_file_paths.txt # Sequence Length Distribution
-echo "sample_id,file_path" > KMC_file_paths.txt # Kmer Content
-
-for i in $(ls *.zip)
-do
-    BASE=$(echo $i | sed 's/\(.*\)\.zip/\1/g')
-    echo $BASE
-    unzip ${BASE}.zip > /dev/null 2>&1
-    
-    ##====== pass,warning,fail (WSF) =============
-    awk '/^>>/ {print}' "$BASE"/fastqc_data.txt | grep -v 'END_MODULE' | sed 's/>>//' > "$BASE"-PWF.txt
-    echo "${BASE},${BASE}-PWF.txt" >> PWF_file_paths.txt
-
-    ##====== per base quality scores (PBQS) ======
-    awk '/^>>Per base sequence quality/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-PBQS.txt
-    echo "${BASE},${BASE}-PBQS.txt" >> PBQS_file_paths.txt
-
-    ##====== per sequence quality scores (PSQS)
-    awk '/^>>Per sequence quality scores/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-PSQS.txt
-    echo "${BASE},${BASE}-PSQS.txt" >> PSQS_file_paths.txt
-
-    ##====== Per sequence GC content (PSGC)
-    awk '/^>>Per sequence GC content/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-PSGC.txt
-    echo "${BASE},${BASE}-PSGC.txt" >> PSGC_file_paths.txt
-    
-    ##====== Per Base Sequence Content (PBSC)
-    awk '/^>>Per base sequence content/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-PBSC.txt
-    echo "${BASE},${BASE}-PBSC.txt" >> PBSC_file_paths.txt
-    
-    ##====== Per Base N Content (PBNC)
-    awk '/^>>Per base N content/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-PBNC.txt
-    echo "${BASE},${BASE}-PBNC.txt" >> PBNC_file_paths.txt
-    
-    ##====== Sequence Duplication Level (SDL)
-    awk '/^>>Sequence Duplication Levels/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-SDL.txt
-    echo "${BASE},${BASE}-SDL.txt" >> SDL_file_paths.txt
-    
-    ##====== Sequence Length Distribution (SLD)
-    awk '/^>>Sequence Length Distribution/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-SLD.txt
-    echo "${BASE},${BASE}-SLD.txt" >> SLD_file_paths.txt
-    
-    ##====== Kmer Content ============
-    awk '/^>>Kmer Content/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-KMC.txt
-    echo "${BASE},${BASE}-KMC.txt" >> KMC_file_paths.txt
-    
-done
-```
-
-
-## Evaluation Overview
-
-```{r 'overview'}
-PWF_file_paths = read.csv('PWF_file_paths.txt',
-                           header = TRUE, stringsAsFactors = FALSE)
-rm('PWF_df')
-for(i in 1:nrow(PWF_file_paths)) {
-  file_path = PWF_file_paths[i,2]
-  pwf_df = read.csv(file_path,
-                     sep='\t', header=FALSE, stringsAsFactors = FALSE)
-  colnames(pwf_df) = c('item', PWF_file_paths[i,1])
-  if (!exists('PWF_df')) {
-    PWF_df = pwf_df
-  } else {
-    PWF_df = cbind(PWF_df, pwf_df[,2,drop=FALSE])
-  }
-}
-```
-
-
-```{r}
-my_icon = c('ok', 'remove', 'star')
-names(my_icon) = c('pass', 'fail', 'warn')
-evaluate_list = list()
-for (i in colnames(PWF_df)[-1]) {
-  evaluate_list[[i]] = formatter(
-      "span", 
-      style = x ~ style("background-color" = ifelse(x =='pass', '#9CD027', ifelse(x == 'fail', '#CC0000', '#FF4E00')), 
-                        "color" = "white",
-                        "width" = "50px",
-                        "float" = "left",
-                        "padding-right" = "5px")
-    )
-}
-
-formattable(PWF_df, evaluate_list)
-```
\ No newline at end of file
--- a/02_fastqc_original_reports.Rmd	Tue Aug 08 11:45:41 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
----
-title: "FastQC original reports"
-output: html_document
----
-
-```{r 'FastQC original reports', include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-
-Below are links to ***Fastqc*** original html reports.
-
-```{r 'html report links'}
-html_report_list = list()
-html_files = list.files('REPORT_OUTPUT_DIR', pattern = '.*html')
-for (i in html_files) {
-  html_report_list[[i]] = tags$li(tags$a(href=i, i))
-}
-tags$ul(html_report_list)
-```
\ No newline at end of file
--- a/1_per_base_quality_scores.Rmd	Tue Aug 08 11:45:41 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
----
-title: "Per Base Quality Scores"
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-
-## Per Base Quality Scores
-
-```{r}
-PBQS_df = data.frame()
-PBQS_file_paths = read.csv('PBQS_file_paths.txt',
-                           header = TRUE, stringsAsFactors = FALSE)
-for(i in 1:nrow(PBQS_file_paths)) {
-  # file_path = paste0('REPORT_OUTPUT_DIR/', PBQS_file_paths[i,2])
-  file_path = PBQS_file_paths[i,2]
-  pbqs_df = read.csv(file_path,
-                     sep='\t', header=TRUE, stringsAsFactors = FALSE) %>%
-    mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]),
-           Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>%
-  (function (df) {
-    df1 = select(df, -Base2)
-    df2 = select(df, -Base1) %>% filter(Base2 != '')
-    colnames(df1) = c(colnames(df1)[1:7], 'Base')
-    colnames(df2) = c(colnames(df2)[1:7], 'Base')
-    res = rbind(df1, df2) %>% arrange(Base)
-    return(res)
-  })
-  pbqs_df$sample_id = rep(PBQS_file_paths[i,1], nrow(pbqs_df))
-  PBQS_df = rbind(PBQS_df, pbqs_df)
-}
-```
-
-
-```{r}
-# datatable(PBQS_df)
-max_phred = max(PBQS_df$Mean) + 10
-hchart(PBQS_df, "line", hcaes(x = Base, y = Mean, group = sample_id)) %>%
-  hc_title(
-    text = "Per Base Quality Score"
-  ) %>%
-  hc_yAxis(
-    title = list(text = "Mean Base Quality Score"),
-    min = 0,
-    max = max_phred,
-    plotLines = list(
-      list(label = list(text = "Phred Score = 27"),
-           width = 2,
-           dashStyle = "dash",
-           color = "green",
-           value = 27),
-      list(label = list(text = "Phred Score = 20"),
-           width = 2,
-           color = "red",
-           value = 20)
-    )
-  ) %>% 
-  hc_exporting(enabled = TRUE)
-```
--- a/2_per_base_N_content.Rmd	Tue Aug 08 11:45:41 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
----
-title: "Per Base N Content"
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-## Per Base N Content
-
-```{r}
-PBNC_df = data.frame()
-PBNC_file_paths = read.csv('PBNC_file_paths.txt',
-                           header = TRUE, stringsAsFactors = FALSE)
-for(i in 1:nrow(PBNC_file_paths)) {
-  # file_path = paste0('REPORT_OUTPUT_DIR/', PBNC_file_paths[i,2])
-  file_path = PBNC_file_paths[i,2]
-  pbnc_df = read.csv(file_path,
-                     sep='\t', header=TRUE, stringsAsFactors = FALSE) %>%
-    mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]),
-           Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>%
-  (function (df) {
-    df1 = select(df, -Base2)
-    df2 = select(df, -Base1) %>% filter(Base2 != '')
-    colnames(df1) = c(colnames(df1)[1:2], 'Base')
-    colnames(df2) = c(colnames(df2)[1:2], 'Base')
-    res = rbind(df1, df2) %>% arrange(Base)
-    return(res)
-  })
-  pbnc_df$sample_id = rep(PBNC_file_paths[i,1], nrow(pbnc_df))
-  PBNC_df = rbind(PBNC_df, pbnc_df)
-}
-```
-
-
-```{r}
-PBNC_df$N.Count = PBNC_df$N.Count * 100
-max_phred = max(PBNC_df$N.Count) + 5
-hchart(PBNC_df, "line", hcaes(x = as.character(Base), y = N.Count, group = sample_id)) %>%
-  hc_title(
-    text = "Per Base N Content"
-  ) %>%
-  hc_xAxis(
-    title = list(text = "Base Position")
-  ) %>%
-  hc_yAxis(
-    title = list(text = "N %"),
-    plotLines = list(
-      list(label = list(text = "N = 5%"),
-           width = 2,
-           dashStyle = "dash",
-           color = "red",
-           value = 5)
-    )
-  ) %>% 
-  hc_exporting(enabled = TRUE)
-```
--- a/3_per_sequence_quality_scores.Rmd	Tue Aug 08 11:45:41 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
----
-title: "Per Sequence Quality Scores"
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-## Per Sequence Quality Scores
-
-```{r}
-PSQS_df = data.frame()
-PSQS_file_paths = read.csv('PSQS_file_paths.txt', 
-                           header = TRUE, stringsAsFactors = FALSE)
-for(i in 1:nrow(PSQS_file_paths)) {
-  # file_path = paste0('REPORT_OUTPUT_DIR/', PSQS_file_paths[i,2])
-  file_path = PSQS_file_paths[i,2]
-  psqs_df = read.csv(file_path,
-                     sep='\t', header=TRUE, stringsAsFactors = FALSE) 
-  psqs_df$sample_id = rep(PSQS_file_paths[i,1], nrow(psqs_df))
-  PSQS_df = rbind(PSQS_df, psqs_df)
-}
-```
-
-
-```{r}
-max_phred = max(PSQS_df$X.Quality) + 5
-hchart(PSQS_df, "line", hcaes(x = X.Quality, y = Count, group = sample_id)) %>%
-  hc_title(
-    text = "Per Sequence Quality Score"
-  ) %>%
-  hc_xAxis(
-    title = list(text = "Mean Sequence Quality Score"),
-    min = 0,
-    max = max_phred,
-    plotLines = list(
-      list(label = list(text = "Phred Score = 27"),
-           width = 2,
-           dashStyle = "dash",
-           color = "green",
-           value = 27),
-      list(label = list(text = "Phred Score = 20"),
-           width = 2,
-           color = "red",
-           value = 20)
-    )
-  ) %>% 
-  hc_exporting(enabled = TRUE)
-```
--- a/4_per_sequence_GC_content.Rmd	Tue Aug 08 11:45:41 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
----
-title: "Per Sequence GC Content"
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-## Per Sequence GC Content
-
-
-```{r}
-PSGC_df = data.frame()
-PSGC_file_paths = read.csv('PSGC_file_paths.txt', 
-                           header = TRUE, stringsAsFactors = FALSE)
-for(i in 1:nrow(PSGC_file_paths)) {
-  # file_path = paste0('REPORT_OUTPUT_DIR/', PSGC_file_paths[i,2])
-  file_path = PSGC_file_paths[i,2]
-  psgc_df = read.csv(file_path,
-                     sep='\t', header=TRUE, stringsAsFactors = FALSE) 
-  psgc_df$sample_id = rep(PSGC_file_paths[i,1], nrow(psgc_df))
-  PSGC_df = rbind(PSGC_df, psgc_df)
-}
-```
-
-
-```{r}
-max_phred = max(PSGC_df$Count) + 5
-hchart(PSGC_df, "line", hcaes(x = X.GC.Content, y = Count, group = sample_id)) %>%
-  hc_title(
-    text = "Per Sequence GC Content"
-  ) %>%
-  hc_xAxis(
-    title = list(text = "% GC")
-  ) %>%
-  hc_exporting(enabled = TRUE)
-```
--- a/5_per_base_sequence_content.Rmd	Tue Aug 08 11:45:41 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
----
-title: "Per Base Sequence Content"
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-## Per Base Sequence Content
-
-```{r}
-PBSC_df = data.frame()
-PBSC_file_paths = read.csv('PBSC_file_paths.txt',
-                           header = TRUE, stringsAsFactors = FALSE)
-for(i in 1:nrow(PBSC_file_paths)) {
-  # file_path = paste0('REPORT_OUTPUT_DIR/', PBSC_file_paths[i,2])
-  file_path = PBSC_file_paths[i,2]
-  pbsc_df = read.csv(file_path,
-                     sep='\t', header=TRUE, stringsAsFactors = FALSE) %>%
-    mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]),
-           Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>%
-  (function (df) {
-    df1 = select(df, -Base2)
-    df2 = select(df, -Base1) %>% filter(Base2 != '')
-    colnames(df1) = c(colnames(df1)[1:5], 'Base')
-    colnames(df2) = c(colnames(df2)[1:5], 'Base')
-    res = rbind(df1, df2) %>% arrange(Base)
-    return(res)
-  })
-  pbsc_df$sample_id = rep(PBSC_file_paths[i,1], nrow(pbsc_df))
-  PBSC_df = rbind(PBSC_df, pbsc_df)
-}
-```
-
-
-```{r out.width="100%"}
-PBSC_df_2 = select(PBSC_df, -X.Base) %>%
-  melt(id = c('Base', 'sample_id'), value.name = 'base_percentage')
-p = ggplot(data = PBSC_df_2, aes(x = Base, y = base_percentage, group = variable, color = variable)) +
-  geom_line() +
-  facet_wrap(~ sample_id)
-ggplotly(p)
-```
-
--- a/_site.yml	Tue Aug 08 11:45:41 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-name: "FastQC Website"
-output_dir: "my_site"
-navbar:
-    title: "FastQC"
-    type: inverse
-    left:
-        - text: "Home"
-          icon: fa-home
-          href: index.html
-        - text: "Evaluation Overview"
-          href: 01_evaluation_overview.html
-        - text: "Evaluation Items"
-          menu:
-            - text: "Per Base Quality Scores"
-              href: 1_per_base_quality_scores.html
-            - text: "Per Base N Content"
-              href: 2_per_base_N_content.html
-            - text: "Per Sequence Quality Scores"
-              href: 3_per_sequence_quality_scores.html
-            - text: "Per Sequence GC Content"
-              href: 4_per_sequence_GC_content.html
-            - text: "Per Base Sequence Content"
-              href: 5_per_base_sequence_content.html
-        - text: "Original FastQC Reports"
-          href: 02_fastqc_original_reports.html
-output:
-  html_document:
-    theme: cosmo
-    highlight: textmate
\ No newline at end of file
--- a/fastqc_site.xml	Tue Aug 08 11:45:41 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,124 +0,0 @@
-<tool id="fastqc_site" name="Fastqc Site" version="1.0.0">
-    <requirements>
-        <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement>
-        <requirement type="package" version="1.20.0">r-getopt</requirement>
-        <requirement type="package" version="1.2">r-rmarkdown</requirement>
-        <requirement type="package" version="1.8.4">r-plyr</requirement>
-        <requirement type="package" version="1.1.0">r-stringr</requirement>
-        <requirement type="package" version="0.5.0">r-highcharter</requirement>
-        <requirement type="package" version="0.2">r-dt</requirement>
-        <requirement type="package" version="1.4.2">r-reshape2</requirement>
-        <requirement type="package" version="4.5.6">r-plotly</requirement>
-        <requirement type="package" version="0.2.0.1">r-formattable</requirement>
-        <requirement type="package" version="0.3.5">r-htmltools</requirement>
-        <requirement type="package" version="0.11.5">fastqc</requirement>
-    </requirements>
-    <description>
-        Implements FastQC analysis and display results in R Markdown website.
-    </description>
-    <stdio>
-        <regex match="Execution halted"
-               source="both"
-               level="fatal"
-               description="Execution halted." />
-        <regex match="Error in"
-               source="both"
-               level="fatal"
-               description="An undefined error occured, please check your intput carefully and contact your administrator." />
-        <regex match="Fatal error"
-               source="both"
-               level="fatal"
-               description="An undefined error occured, please check your intput carefully and contact your administrator." />
-    </stdio>
-    <command>
-        <![CDATA[
-
-        Rscript '${__tool_directory__}/fastqc_site_render.R'
-
-            ## 1. input data
-            -r $reads
-            -e $echo
-
-            ## 2. output report and report site directory
-		    -o $fastqc_site
-		    -d $fastqc_site.files_path
-
-		    ## 3. Rmd templates sitting in the tool directory
-
-		        ## _site.yml and index.Rmd template files
-                -s '${__tool_directory__}/_site.yml'
-                -i '${__tool_directory__}/index.Rmd'
-
-                ## other Rmd body template files
-		        -p  '${__tool_directory__}/01_evaluation_overview.Rmd'
-		        -a  '${__tool_directory__}/02_fastqc_original_reports.Rmd'
-		        -b  '${__tool_directory__}/1_per_base_quality_scores.Rmd'
-		        -c  '${__tool_directory__}/2_per_base_N_content.Rmd'
-		        -f  '${__tool_directory__}/3_per_sequence_quality_scores.Rmd'
-		        -g  '${__tool_directory__}/4_per_sequence_GC_content.Rmd'
-		        -h  '${__tool_directory__}/5_per_base_sequence_content.Rmd'
-
-        ]]>
-    </command>
-    <inputs>
-        <param format="fastq,fastq.gz,fastq.bz2,bam,sam" multiple="true" name="reads" type="data" label="Short reads data from history" />
-        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" />
-    </inputs>
-    <outputs>
-        <data format="html" name="fastqc_site" label="fastqc site" />
-    </outputs>
-    <citations>
-        <citation type="bibtex">
-            @misc{bioinformatics2014fastqc,
-            title={FastQC},
-            author={Bioinformatics, Babraham},
-            year={2014}
-            }
-        </citation>
-        <citation type="bibtex">
-            @article{allaire2016rmarkdown,
-            title={rmarkdown: Dynamic Documents for R, 2016},
-            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
-            journal={R package version 0.9},
-            volume={6},
-            year={2016}
-            }
-        </citation>
-        <citation type="bibtex">
-            @book{xie2015dynamic,
-            title={Dynamic Documents with R and knitr},
-            author={Xie, Yihui},
-            volume={29},
-            year={2015},
-            publisher={CRC Press}
-            }
-        </citation>
-        <citation type="bibtex">
-            @misc{plotly2017,
-            title = {plotly: Create Interactive Web Graphics via 'plotly.js'},
-            author = {Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and Marianne Corvellec and Pedro Despouy},
-            year = {2017},
-            note = {R package version 4.6.0},
-            url = {https://CRAN.R-project.org/package=plotly},
-            }
-        </citation>
-        <citation type="bibtex">
-            @misc{highcharter2017,
-            title = {highcharter: A Wrapper for the 'Highcharts' Library},
-            author = {Joshua Kunst},
-            year = {2017},
-            note = {R package version 0.5.0},
-            url = {https://CRAN.R-project.org/package=highcharter},
-            }
-        </citation>
-        <citation type="bibtex">
-            @misc{formattable2016,
-            title = {formattable: Create 'Formattable' Data Structures},
-            author = {Kun Ren and Kenton Russell},
-            year = {2016},
-            note = {R package version 0.2.0.1},
-            url = {https://CRAN.R-project.org/package=formattable},
-            }
-        </citation>
-    </citations>
-</tool>
\ No newline at end of file
--- a/fastqc_site_render.R	Tue Aug 08 11:45:41 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,195 +0,0 @@
-##======= Handle arguments from command line ========
-# setup R error handline to go to stderr
-options(show.error.messages=FALSE,
-        error=function(){
-          cat(geterrmessage(), file=stderr())
-          quit("no", 1, F)
-        })
-
-# we need that to not crash galaxy with an UTF8 error on German LC settings.
-loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
-
-# suppress warning
-options(warn = -1)
-
-options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)
-args = commandArgs(trailingOnly=TRUE)
-
-suppressPackageStartupMessages({
-  library(getopt)
-  library(tools)
-})
-
-# column 1: the long flag name
-# column 2: the short flag alias. A SINGLE character string
-# column 3: argument mask
-#           0: no argument
-#           1: argument required
-#           2: argument is optional
-# column 4: date type to which the flag's argument shall be cast.
-#           possible values: logical, integer, double, complex, character.
-spec_list=list()
-
-##------- 1. input data ---------------------
-spec_list$READS = c('reads', 'r', '1', 'character')
-spec_list$ECHO = c('echo', 'e', '1', 'character')
-
-##--------2. output report and report site directory --------------
-spec_list$FASTQC_SITE = c('fastqc_site', 'o', '1', 'character')
-spec_list$FASTQC_SITE_DIR = c('fastqc_site_dir', 'd', '1', 'character')
-
-##--------3. Rmd templates sitting in the tool directory ----------
-
-    ## _site.yml and index.Rmd files
-    spec_list$SITE_YML = c('site_yml', 's', 1, 'character')
-    spec_list$INDEX_Rmd = c('index_rmd', 'i', 1, 'character')
-    
-    ## other Rmd body template files
-    spec_list$x01 = c('x01_evaluation_overview', 'p', '1', 'character')
-    spec_list$x02 = c('x02_fastqc_original_reports', 'a', '1', 'character')
-    spec_list$x1 = c('x1_per_base_quality_scores', 'b', '1', 'character')
-    spec_list$x2 = c('x2_per_base_N_content', 'c', '1', 'character')
-    spec_list$x3 = c('x3_per_sequence_quality_scores', 'f', '1', 'character')
-    spec_list$x4 = c('x4_per_sequence_GC_content', 'g', '1', 'character')
-    spec_list$x5 = c('x5_per_base_sequence_content', 'h', '1', 'character')
-
-##------------------------------------------------------------------
-
-spec = t(as.data.frame(spec_list))
-opt = getopt(spec)
-# arguments are accessed by long flag name (the first column in the spec matrix)
-#                        NOT by element name in the spec_list
-# example: opt$help, opt$expression_file
-##====== End of arguments handling ==========
-
-#------ Load libraries ---------
-library(rmarkdown)
-library(plyr)
-library(stringr)
-library(dplyr)
-library(highcharter)
-library(DT)
-library(reshape2)
-library(plotly)
-library(formattable)
-library(htmltools)
-
-
-#----- 1. create the report directory ------------------------
-paste0('mkdir -p ', opt$fastqc_site_dir) %>%
-  system()
-
-#----- 2. generate Rmd files with Rmd templates --------------
-#   a. templates without placeholder variables:
-#         copy templates from tool directory to the working directory.
-#   b. templates with placeholder variables:
-#         substitute variables with user input values and place them in the working directory.
-
-
-    #----- Copy index.Rmd and _site.yml files to job working direcotry -----
-    file.copy(opt$index_rmd, 'index.Rmd', recursive=TRUE)
-    file.copy(opt$site_yml, '_site.yml', recursive=TRUE)
-    #---------------------------------------------------------
-    
-    #----- 01_evaluation_overview.Rmd -----------------------
-    readLines(opt$x01_evaluation_overview) %>%
-      (function(x) {
-        gsub('ECHO', opt$echo, x)
-      }) %>%
-      (function(x) {
-        gsub('READS', opt$reads, x)
-      }) %>%
-      (function(x) {
-        gsub('REPORT_OUTPUT_DIR', opt$fastqc_site_dir, x)
-      }) %>%
-      (function(x) {
-        fileConn = file('01_evaluation_overview.Rmd')
-        writeLines(x, con=fileConn)
-        close(fileConn)
-      })
-    
-    #----- 1_per_base_quality_scores.Rmd --------------------
-    readLines(opt$x1_per_base_quality_scores) %>%
-      (function(x) {
-        gsub('ECHO', opt$echo, x)
-      }) %>%
-      (function(x) {
-        fileConn = file('1_per_base_quality_scores.Rmd')
-        writeLines(x, con=fileConn)
-        close(fileConn)
-      })
-    
-    #----- 2_per_base_N_content.Rmd -------------------------
-    readLines(opt$x2_per_base_N_content) %>%
-      (function(x) {
-        gsub('ECHO', opt$echo, x)
-      }) %>%
-      (function(x) {
-        fileConn = file('2_per_base_N_content.Rmd')
-        writeLines(x, con=fileConn)
-        close(fileConn)
-      })
-    
-    #----- 3_per_sequence_quality_scores.Rmd ----------------
-    readLines(opt$x3_per_sequence_quality_scores) %>%
-      (function(x) {
-        gsub('ECHO', opt$echo, x)
-      }) %>%
-      (function(x) {
-        fileConn = file('3_per_sequence_quality_scores.Rmd')
-        writeLines(x, con=fileConn)
-        close(fileConn)
-      })
-    
-    
-    #----- 4_per_sequence_GC_content.Rmd --------------------
-    readLines(opt$x4_per_sequence_GC_content) %>%
-      (function(x) {
-        gsub('ECHO', opt$echo, x)
-      }) %>%
-      (function(x) {
-        fileConn = file('4_per_sequence_GC_content.Rmd')
-        writeLines(x, con=fileConn)
-        close(fileConn)
-      })
-    
-    
-    #----- 5_per_base_sequence_content.Rmd ------------------
-    readLines(opt$x5_per_base_sequence_content) %>%
-      (function(x) {
-        gsub('ECHO', opt$echo, x)
-      }) %>%
-      (function(x) {
-        fileConn = file('5_per_base_sequence_content.Rmd')
-        writeLines(x, con=fileConn)
-        close(fileConn)
-      })
-
-    #----- 02_fastqc_original_reports.Rmd -------------------
-    readLines(opt$x02_fastqc_original_reports) %>%
-      (function(x) {
-        gsub('ECHO', opt$echo, x)
-      }) %>%
-      (function(x) {
-        gsub('REPORT_OUTPUT_DIR', opt$fastqc_site_dir, x)
-      }) %>%
-      (function(x) {
-        fileConn = file('02_fastqc_original_reports.Rmd')
-        writeLines(x, con=fileConn)
-        close(fileConn)
-      })
-
-
-
-#------ 3. render all Rmd files with render_site() --------
-render_site()    
-
-
-#-------4. manipulate outputs -----------------------------
-#   a. copy index.html to the report output path
-#   b. copy all files in 'my_site' to the report output directory
-file.copy('my_site/index.html', opt$fastqc_site, recursive=TRUE)
-paste0('cp -r my_site/* ', opt$fastqc_site_dir) %>%
-  system()
-
-
--- a/index.Rmd	Tue Aug 08 11:45:41 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
----
-title: "FastQC Report"
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-
-
-## References
-
-* Andrews, Simon. "FastQC: a quality control tool for high throughput sequence data." (2010): 175-176.
-* Goecks, Jeremy, Anton Nekrutenko, and James Taylor. "Galaxy: a comprehensive approach for supporting accessible, reproducible, and transparent computational research in the life sciences." Genome biology 11.8 (2010): R86.
-* Afgan, Enis, et al. "The Galaxy platform for accessible, reproducible and collaborative biomedical analyses: 2016 update." Nucleic acids research (2016): gkw343.
-* Highcharts. https://www.highcharts.com/. (access by May 26, 2017).
-* R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/.
-* Joshua Kunst (2017). highcharter: A Wrapper for the 'Highcharts' Library. R package version 0.5.0. https://CRAN.R-project.org/package=highcharter
-* Carson Sievert, Chris Parmer, Toby Hocking, Scott Chamberlain, Karthik Ram, Marianne Corvellec and Pedro Despouy (2017). plotly: Create Interactive Web Graphics via 'plotly.js'. R package version 4.6.0. https://CRAN.R-project.org/package=plotly
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wgcna_construct_network.Rmd	Tue Aug 08 12:35:11 2017 -0400
@@ -0,0 +1,178 @@
+---
+title: 'WGCNA: construct network'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO
+)
+```
+
+# Import workspace 
+
+This step imports workspace from the **WGCNA: preprocessing** step.
+
+```{r}
+fcp = file.copy("PREPROCESSING_WORKSPACE", "deseq.RData")
+load("deseq.RData")
+```
+
+
+# Processing outliers {.tabset}
+
+## Before removing outliers
+
+```{r}
+plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
+     cex.axis = 1, cex.main = 1, cex = 0.5)
+if(!is.na(HEIGHT_CUT)) {
+  # plot a line to show the cut
+  abline(h = HEIGHT_CUT, col = "red")
+  # determine cluster under the line
+  clust = cutreeStatic(sampleTree, cutHeight = HEIGHT_CUT, minSize = 10)
+  keepSamples = (clust==1)
+  expression_data = expression_data[keepSamples, ]
+}
+```
+
+## After removing outliers
+
+```{r}
+sampleTree = hclust(dist(expression_data), method = "average");
+plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="",
+     cex.axis = 1, cex.main = 1, cex = 0.5)
+```
+
+
+# Trait data {.tabeset}
+
+If trait data is provided, the first 100 rows from the data will be displayed here. A plot consisting of sample cluster dendrogram and trait heatmap will also be gerenated.
+
+## Trait data table
+
+```{r}
+trait_data = data.frame()
+if ("TRAIT_DATA" != 'None') {
+  trait_data = read.csv("TRAIT_DATA", header = TRUE, row.names = 1)
+  # form a data frame analogous to expression data that will hold the traits.
+  sample_names = rownames(expression_data)
+  trait_rows = match(sample_names, rownames(trait_data))
+  trait_data = trait_data[trait_rows, ]
+  datatable(head(trait_data, 100), style="bootstrap", filter = 'top',
+          class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
+}
+```
+
+## Dendrogram and heatmap
+
+```{r fig.align='center', fig.width=8, fig.height=9}
+if (nrow(trait_data) != 0) {
+  traitColors = numbers2colors(trait_data, signed = FALSE)
+  plotDendroAndColors(sampleTree, traitColors,
+                      groupLabels = names(trait_data),
+                      main = "Sample dendrogram and trait heatmap",
+                      cex.dendroLabels = 0.5)
+}
+```
+
+
+# The thresholding power
+
+```{r}
+powers = c(1:10, seq(12, 20, 2))
+soft_threshold = pickSoftThreshold(expression_data, powerVector = powers, verbose = 5)
+```
+
+```{r fig.align='center'}
+par(mfrow=c(1,2))
+plot(soft_threshold$fitIndices[,1], -sign(soft_threshold$fitIndices[,3])*soft_threshold$fitIndices[,2],
+     xlab="Soft Threshold (power)",
+     ylab="Scale Free Topology Model Fit,signed R^2",type="n",
+     main = paste("Scale independence"),
+     cex.lab = 0.5);
+text(soft_threshold$fitIndices[,1], -sign(soft_threshold$fitIndices[,3])*soft_threshold$fitIndices[,2],
+     labels=powers,cex=0.5,col="red");
+
+# calculate soft threshold power
+y = -sign(soft_threshold$fitIndices[,3])*soft_threshold$fitIndices[,2]
+r2_cutoff = 0.9
+for(i in 1:length(powers)) {
+  if(y[i] > r2_cutoff) {
+    soft_threshold_power = soft_threshold$fitIndices[,1][i]
+    r2_cutoff_new = y[i]
+    break
+  } 
+  soft_threshold_power = soft_threshold$fitIndices[,1][length(powers)]
+}
+abline(h=r2_cutoff, col="red")
+abline(v=soft_threshold_power, col="blue")
+text(soft_threshold_power+1, r2_cutoff-0.1, 
+     paste0('R^2 cutoff = ', round(r2_cutoff_new,2)),
+     cex = 0.5, col = "red")
+
+plot(soft_threshold$fitIndices[,1], soft_threshold$fitIndices[,5],
+     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
+     main = paste("Mean connectivity"),
+     cex.lab = 0.5)
+text(soft_threshold$fitIndices[,1], soft_threshold$fitIndices[,5], labels=powers, cex=0.5,col="red")
+par(mfrow=c(1,1))
+```
+
+
+# Construct network 
+
+The gene network is constructed based on **soft threshold power = `r soft_threshold_power`**
+
+```{r}
+gene_network = blockwiseModules(expression_data, power = soft_threshold_power,
+                                TOMType = "unsigned", minModuleSize = 30,
+                                reassignThreshold = 0, mergeCutHeight = 0.25,
+                                numericLabels = TRUE, pamRespectsDendro = FALSE,
+                                verbose = 3)
+```
+
+
+# Gene modules {.tabset}
+
+## Idenfity gene modules 
+
+```{r}
+modules = table(gene_network$colors)
+n_modules = length(modules) - 1
+module_size_upper = modules[2]
+module_size_lower = modules[length(modules)]
+
+module_table = data.frame(model_label = c(0, 1:n_modules),
+                          gene_size = as.vector(modules))
+datatable(t(module_table))
+```
+
+The results above indicates that there are **`r n_modules` gene modules**, labeled 1 through `r length(n_modules)` in order of descending size. The largest module has **`r module_size_upper` genes**, and the smallest module has **`r module_size_lower` genes**. The label 0 is reserved for genes outside of all modules. 
+
+
+## Dendrogram and module plot
+
+```{r}
+# Convert labels to colors for plotting
+module_colors = labels2colors(gene_network$colors)
+# Plot the dendrogram and the module colors underneath
+plotDendroAndColors(gene_network$dendrograms[[1]], module_colors[gene_network$blockGenes[[1]]],
+                    "Module colors",
+                    dendroLabels = FALSE, hang = 0.03,
+                    addGuide = TRUE, guideHang = 0.05)
+```
+
+
+```{r echo=FALSE}
+# save workspace
+rm("opt")
+save(list=ls(all.names = TRUE), file='CONSTRUCT_NETWORK_WORKSPACE')
+```
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wgcna_construct_network.xml	Tue Aug 08 12:35:11 2017 -0400
@@ -0,0 +1,105 @@
+<tool id="wgcna_construct_network" name="WGCNA: construct network" version="1.0.0">
+    <requirements>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.2">r-rmarkdown</requirement>
+        <requirement type="package" version="1.8.4">r-plyr</requirement>
+        <requirement type="package" version="0.4.0">r-highcharter</requirement>
+        <requirement type="package" version="0.2">r-dt</requirement>
+        <requirement type="package" version="0.3.5">r-htmltools</requirement>
+        <requirement type="package" version="1.51">r-wgcna</requirement>
+    </requirements>
+    <description>
+        Construct gene network.
+    </description>
+    <stdio>
+        <regex match="Execution halted"
+               source="both"
+               level="fatal"
+               description="Execution halted." />
+        <regex match="Error in"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your intput carefully and contact your administrator." />
+        <regex match="Fatal error"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your intput carefully and contact your administrator." />
+    </stdio>
+    <command>
+        <![CDATA[
+
+        Rscript '${__tool_directory__}/wgcna_construct_network_render.R'
+
+            ## 1. input data
+            -e $echo
+            -w $preprocessing_workspace
+            -h '$height_cut'
+            -t $trait_data
+
+
+
+            ## 2. output report and report site directory
+		    -o $wgcna_construct_network
+		    -d $wgcna_construct_network.files_path
+		    -W $construct_network_workspace
+
+
+		    ## 3. Rmd templates in the tool directory
+
+		        ## _site.yml and index.Rmd template files
+                -M '${__tool_directory__}/wgcna_construct_network.Rmd'
+
+
+
+        ]]>
+    </command>
+    <inputs>
+        <param type="data" name="preprocessing_workspace" format="rdata" optional="false"
+               label="R workspace from WGCNA: preprocessing" />
+        <param type="float" name="height_cut" optional="true" label="Height"
+               help="Refer to the sample clustering plot from WGCNA: preprocessing and choose a height cut that will
+                    remove outliers. If there is not outlier, leave this field blank." />
+        <param type="data" name="trait_data" format="csv" optional="true"
+               label="Trait data"
+               help="If trait data is provided, a plot consisting of sample clustering and trait heatmap will
+                    be generated. This field is optional. "/>
+
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" />
+    </inputs>
+    <outputs>
+        <data name="wgcna_construct_network" format="html" label="WGCNA: construct_network" />
+        <data name="construct_network_workspace" format="rdata" label="R workspace: WGCNA construct_network" />
+    </outputs>
+    <citations>
+        <citation type="bibtex">
+            @article{langfelder2008wgcna,
+            title={WGCNA: an R package for weighted correlation network analysis},
+            author={Langfelder, Peter and Horvath, Steve},
+            journal={BMC bioinformatics},
+            volume={9},
+            number={1},
+            pages={559},
+            year={2008},
+            publisher={BioMed Central}
+            }
+        </citation>
+        <citation type="bibtex">
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        </citation>
+        <citation type="bibtex">
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        </citation>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wgcna_construct_network_render.R	Tue Aug 08 12:35:11 2017 -0400
@@ -0,0 +1,112 @@
+##======= Handle arguments from command line ========
+# setup R error handline to go to stderr
+options(show.error.messages=FALSE,
+        error=function(){
+          cat(geterrmessage(), file=stderr())
+          quit("no", 1, F)
+        })
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+# suppress warning
+options(warn = -1)
+
+options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)
+args = commandArgs(trailingOnly=TRUE)
+
+suppressPackageStartupMessages({
+  library(getopt)
+  library(tools)
+})
+
+# column 1: the long flag name
+# column 2: the short flag alias. A SINGLE character string
+# column 3: argument mask
+#           0: no argument
+#           1: argument required
+#           2: argument is optional
+# column 4: date type to which the flag's argument shall be cast.
+#           possible values: logical, integer, double, complex, character.
+spec_list=list()
+
+##------- 1. input data ---------------------
+spec_list$ECHO = c('echo', 'e', '1', 'character')
+spec_list$PREPROCESSING_WORKSPACE = c('preprocessing_workspace', 'w', '1', 'character')
+spec_list$HEIGHT_CUT = c('height_cut', 'h', '2', 'double')
+spec_list$TRAIT_DATA = c('trait_data', 't', '2', 'character')
+
+
+##--------2. output report and report site directory --------------
+spec_list$OUTPUT_HTML = c('wgcna_construct_network_html', 'o', '1', 'character')
+spec_list$OUTPUT_DIR = c('wgcna_construct_network_dir', 'd', '1', 'character')
+spec_list$CONSTRUCT_NETWORK_WORKSPACE = c('construct_network_workspace', 'W', '1', 'character')
+
+
+##--------3. Rmd templates in the tool directory ----------
+
+spec_list$WGCNA_PREPROCESSING_RMD = c('wgcna_construct_network_rmd', 'M', '1', 'character')
+
+
+
+##------------------------------------------------------------------
+
+spec = t(as.data.frame(spec_list))
+opt = getopt(spec)
+# arguments are accessed by long flag name (the first column in the spec matrix)
+#                        NOT by element name in the spec_list
+# example: opt$help, opt$expression_file
+##====== End of arguments handling ==========
+
+#------ Load libraries ---------
+library(rmarkdown)
+library(WGCNA)
+library(DT)
+library(htmltools)
+library(ggplot2)
+
+
+#----- 1. create the report directory ------------------------
+system(paste0('mkdir -p ', opt$wgcna_construct_network_dir))
+
+
+#----- 2. generate Rmd files with Rmd templates --------------
+#   a. templates without placeholder variables:
+#         copy templates from tool directory to the working directory.
+#   b. templates with placeholder variables:
+#         substitute variables with user input values and place them in the working directory.
+
+
+#----- 01 wgcna_construct_network.Rmd -----------------------
+readLines(opt$wgcna_construct_network_rmd) %>%
+  (function(x) {
+    gsub('ECHO', opt$echo, x)
+  }) %>%
+  (function(x) {
+    gsub('PREPROCESSING_WORKSPACE', opt$preprocessing_workspace, x)
+  }) %>%
+  (function(x) {
+    gsub('HEIGHT_CUT', opt$height_cut, x)
+  }) %>%
+  (function(x) {
+    gsub('TRAIT_DATA', opt$trait_data, x)
+  }) %>%
+  (function(x) {
+    gsub('OUTPUT_DIR', opt$wgcna_construct_network_dir, x)
+  }) %>%
+  (function(x) {
+    gsub('CONSTRUCT_NETWORK_WORKSPACE', opt$construct_network_workspace, x)
+  }) %>%
+  (function(x) {
+    fileConn = file('wgcna_construct_network.Rmd')
+    writeLines(x, con=fileConn)
+    close(fileConn)
+  })
+
+
+#------ 3. render all Rmd files --------
+render('wgcna_construct_network.Rmd', output_file = opt$wgcna_construct_network_html)
+
+#-------4. manipulate outputs -----------------------------
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wgcna_eigengene_visualization.Rmd	Tue Aug 08 12:35:11 2017 -0400
@@ -0,0 +1,121 @@
+---
+title: 'WGCNA: eigengene visualization'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO
+)
+```
+
+# Import workspace 
+
+This step imports workspace from the **WGCNA: construct network** step.
+
+```{r}
+fcp = file.copy("CONSTRUCT_NETWORK_WORKSPACE", "deseq.RData")
+load("deseq.RData")
+```
+
+
+# Gene modules {.tabset}
+
+```{r}
+if(!is.na(SOFT_THRESHOLD_POWER)) soft_threshold_power = SOFT_THRESHOLD_POWER
+```
+
+## Identify gene modules
+
+The gene network is constructed based on **soft threshold power = `r soft_threshold_power`**
+
+```{r}
+gene_network = blockwiseModules(expression_data, power = soft_threshold_power,
+                                TOMType = "unsigned", minModuleSize = 30,
+                                reassignThreshold = 0, mergeCutHeight = 0.25,
+                                numericLabels = TRUE, pamRespectsDendro = FALSE,
+                                verbose = 3)
+```
+
+
+```{r}
+modules = table(gene_network$colors)
+n_modules = length(modules) - 1
+module_size_upper = modules[2]
+module_size_lower = modules[length(modules)]
+
+module_table = data.frame(model_label = c(0, 1:n_modules),
+                          gene_size = as.vector(modules))
+datatable(t(module_table))
+```
+
+The results above indicates that there are **`r n_modules` gene modules**, labeled 1 through `r length(n_modules)` in order of descending size. The largest module has **`r module_size_upper` genes**, and the smallest module has **`r module_size_lower` genes**. The label 0 is reserved for genes outside of all modules. 
+
+
+## Dendrogram and module plot
+
+```{r}
+# Convert labels to colors for plotting
+module_colors = labels2colors(gene_network$colors)
+# Plot the dendrogram and the module colors underneath
+plotDendroAndColors(gene_network$dendrograms[[1]], module_colors[gene_network$blockGenes[[1]]],
+                    "Module colors",
+                    dendroLabels = FALSE, hang = 0.03,
+                    addGuide = TRUE, guideHang = 0.05)
+```
+
+
+# Gene module correlation
+
+We can calculate eigengenes and use them as representative profiles to quantify similarity of found gene modules.
+
+```{r}
+n_genes = ncol(expression_data)
+n_samples = nrow(expression_data)
+```
+
+```{r}
+diss_tom = 1-TOMsimilarityFromExpr(expression_data, power = soft_threshold_power)
+set.seed(123)
+select_genes = sample(n_genes, size = PLOT_GENES)
+select_diss_tom = diss_tom[select_genes, select_genes]
+
+# calculate gene tree on selected genes
+select_gene_tree = hclust(as.dist(select_diss_tom), method = 'average')
+select_module_colors = module_colors[select_genes]
+
+# transform diss_tom with a power to make moderately strong connections more visiable in the heatmap.
+plot_diss_tom = select_diss_tom^7
+# set diagonal to NA for a nicer plot
+diag(plot_diss_tom) = NA
+```
+
+
+```{r fig.align='center'}
+TOMplot(plot_diss_tom, select_gene_tree, select_module_colors, main = "Network heatmap")
+```
+
+
+# Eigengene visualization {.tabset}
+
+## Eigengene dendrogram
+
+```{r fig.align='center'}
+module_eigengenes = moduleEigengenes(expression_data, module_colors)$eigengenes
+plotEigengeneNetworks(module_eigengenes, "Eigengene dendrogram", 
+                      plotHeatmaps = FALSE)
+```
+
+## Eigengene adjacency heatmap
+
+```{r fig.align='center'}
+plotEigengeneNetworks(module_eigengenes, "Eigengene adjacency heatmap", 
+                      marHeatmap = c(2, 3, 2, 2),
+                      plotDendrograms = FALSE, xLabelsAngle = 90)
+```
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wgcna_eigengene_visualization.xml	Tue Aug 08 12:35:11 2017 -0400
@@ -0,0 +1,100 @@
+<tool id="wgcna_eigengene_visualization" name="WGCNA: eigengene visualization" version="1.0.0">
+    <requirements>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.2">r-rmarkdown</requirement>
+        <requirement type="package" version="1.8.4">r-plyr</requirement>
+        <requirement type="package" version="0.4.0">r-highcharter</requirement>
+        <requirement type="package" version="0.2">r-dt</requirement>
+        <requirement type="package" version="0.3.5">r-htmltools</requirement>
+        <requirement type="package" version="1.51">r-wgcna</requirement>
+    </requirements>
+    <description>
+        Eigengene visualization.
+    </description>
+    <stdio>
+        <regex match="Execution halted"
+               source="both"
+               level="fatal"
+               description="Execution halted." />
+        <regex match="Error in"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your intput carefully and contact your administrator." />
+        <regex match="Fatal error"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your intput carefully and contact your administrator." />
+    </stdio>
+    <command>
+        <![CDATA[
+        ## Add tools to PATH
+        export PATH=/opt/R-3.2.5/bin:\$PATH &&
+
+        Rscript '${__tool_directory__}/wgcna_eigengene_visualization_render.R'
+
+            ## 1. input data
+            -e $echo
+            -w $construct_network_workspace
+            -p '$soft_threshold_power'
+            -n $plot_genes
+
+
+            ## 2. output report and report site directory
+		    -o $wgcna_eigengene_visualization
+		    -d $wgcna_eigengene_visualization.files_path
+
+		    ## 3. Rmd templates in the tool directory
+
+                -M '${__tool_directory__}/wgcna_eigengene_visualization.Rmd'
+
+
+
+        ]]>
+    </command>
+    <inputs>
+        <param type="data" name="construct_network_workspace" format="rdata" optional="false"
+               label="R workspace from WGCNA: construct network" />
+        <param type="integer" name="soft_threshold_power" optional="true" label="Soft threshold power"
+               help="Refer to the scale independence plot from 'WGCNA: construct network' and choose an optimal soft threshold power.
+               An optimal power will be calculated automatically if no value is provided." />
+        <param type="integer" name="plot_genes" value="400" min="1" label="Number of genes" optional="false"
+               help="The number of genes that will be used. It is possible to speed up the plotting by providing a subset of
+                    genes. However, the gene dendrogram may ofter look different from dendrogram of all genes." />
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" />
+    </inputs>
+    <outputs>
+        <data name="wgcna_eigengene_visualization" format="html" label="WGCNA: eigengene visualization" />
+    </outputs>
+    <citations>
+        <citation type="bibtex">
+            @article{langfelder2008wgcna,
+            title={WGCNA: an R package for weighted correlation network analysis},
+            author={Langfelder, Peter and Horvath, Steve},
+            journal={BMC bioinformatics},
+            volume={9},
+            number={1},
+            pages={559},
+            year={2008},
+            publisher={BioMed Central}
+            }
+        </citation>
+        <citation type="bibtex">
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        </citation>
+        <citation type="bibtex">
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        </citation>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wgcna_eigengene_visualization_render.R	Tue Aug 08 12:35:11 2017 -0400
@@ -0,0 +1,109 @@
+##======= Handle arguments from command line ========
+# setup R error handline to go to stderr
+options(show.error.messages=FALSE,
+        error=function(){
+          cat(geterrmessage(), file=stderr())
+          quit("no", 1, F)
+        })
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+# suppress warning
+options(warn = -1)
+
+options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)
+args = commandArgs(trailingOnly=TRUE)
+
+suppressPackageStartupMessages({
+  library(getopt)
+  library(tools)
+})
+
+# column 1: the long flag name
+# column 2: the short flag alias. A SINGLE character string
+# column 3: argument mask
+#           0: no argument
+#           1: argument required
+#           2: argument is optional
+# column 4: date type to which the flag's argument shall be cast.
+#           possible values: logical, integer, double, complex, character.
+spec_list=list()
+
+##------- 1. input data ---------------------
+spec_list$ECHO = c('echo', 'e', '1', 'character')
+spec_list$CONSTRUCT_NETWORK_WORKSPACE = c('construct_network_workspace', 'w', '1', 'character')
+spec_list$SOFT_THRESHOLD_POWER = c('soft_threshold_power', 'p', '2', 'double')
+spec_list$PLOT_GENES = c('plot_genes', 'n', '1', 'integer')
+
+
+##--------2. output report and report site directory --------------
+spec_list$OUTPUT_HTML = c('wgcna_eigengene_visualization_html', 'o', '1', 'character')
+spec_list$OUTPUT_DIR = c('wgcna_eigengene_visualization_dir', 'd', '1', 'character')
+
+
+
+##--------3. Rmd templates in the tool directory ----------
+
+spec_list$WGCNA_EIGENGENE_VISUALIZATION_RMD = c('wgcna_eigengene_visualization_rmd', 'M', '1', 'character')
+
+
+
+##------------------------------------------------------------------
+
+spec = t(as.data.frame(spec_list))
+opt = getopt(spec)
+# arguments are accessed by long flag name (the first column in the spec matrix)
+#                        NOT by element name in the spec_list
+# example: opt$help, opt$expression_file
+##====== End of arguments handling ==========
+
+#------ Load libraries ---------
+library(rmarkdown)
+library(WGCNA)
+library(DT)
+library(htmltools)
+library(ggplot2)
+
+
+#----- 1. create the report directory ------------------------
+system(paste0('mkdir -p ', opt$wgcna_eigengene_visualization_dir))
+
+
+#----- 2. generate Rmd files with Rmd templates --------------
+#   a. templates without placeholder variables:
+#         copy templates from tool directory to the working directory.
+#   b. templates with placeholder variables:
+#         substitute variables with user input values and place them in the working directory.
+
+
+#----- 01 wgcna_eigengene_visualization.Rmd -----------------------
+readLines(opt$wgcna_eigengene_visualization_rmd) %>%
+  (function(x) {
+    gsub('ECHO', opt$echo, x)
+  }) %>%
+  (function(x) {
+    gsub('CONSTRUCT_NETWORK_WORKSPACE', opt$construct_network_workspace, x)
+  }) %>%
+  (function(x) {
+    gsub('SOFT_THRESHOLD_POWER', opt$soft_threshold_power, x)
+  }) %>%
+  (function(x) {
+    gsub('PLOT_GENES', opt$plot_genes, x)
+  }) %>%
+  (function(x) {
+    gsub('OUTPUT_DIR', opt$wgcna_eigengene_visualization_dir, x)
+  }) %>%
+  (function(x) {
+    fileConn = file('wgcna_eigengene_visualization.Rmd')
+    writeLines(x, con=fileConn)
+    close(fileConn)
+  })
+
+
+#------ 3. render all Rmd files --------
+render('wgcna_eigengene_visualization.Rmd', output_file = opt$wgcna_eigengene_visualization_html)
+
+#-------4. manipulate outputs -----------------------------
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wgcna_preprocessing.Rmd	Tue Aug 08 12:35:11 2017 -0400
@@ -0,0 +1,76 @@
+---
+title: 'WGCNA: data preprocessing'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO
+)
+```
+
+```{r}
+str(opt)
+```
+
+# Import data
+
+Each row represents a gene and each column represents a sample.
+
+```{r}
+expression_data = read.csv('EXPRESSION_DATA', header = TRUE, row.names = 1)
+```
+
+Display the first 100 genes.
+
+```{r}
+datatable(head(expression_data, 100), style="bootstrap", filter = 'top',
+          class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
+```
+
+Transpose expression data matrix so that each row represents a sample and each column represents a gene.
+
+```{r}
+expression_data = as.data.frame(t(expression_data))
+```
+
+# Checking data
+
+Checking data for excessive missing values and identification of outlier microarray samples.
+
+```{r}
+gsg = goodSamplesGenes(expression_data, verbose = 3)
+if (!gsg$allOK) {
+  # Optionally, print the gene and sample names that were removed:
+  if (sum(!gsg$goodGenes)>0)
+    printFlush(paste("Removing genes:", paste(names(expression_data)[!gsg$goodGenes], collapse = ", ")));
+  if (sum(!gsg$goodSamples)>0)
+    printFlush(paste("Removing samples:", paste(rownames(expression_data)[!gsg$goodSamples], collapse = ", ")));
+  # Remove the offending genes and samples from the data:
+  expression_data = expression_data[gsg$goodSamples, gsg$goodGenes]
+} else {
+  print('all genes are OK!')
+}
+```
+
+# Clustering samples
+
+If there are any outliers, choose a height cut that will remove the offending sample. Remember this number since you will need this number in further analysis.
+
+```{r fig.align='center'}
+sampleTree = hclust(dist(expression_data), method = "average");
+plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="",
+     cex.axis = 1, cex.main = 1, cex = 0.5)
+```
+
+
+```{r echo=FALSE}
+rm("opt")
+save(list=ls(all.names = TRUE), file='PREPROCESSING_WORKSPACE')
+```
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wgcna_preprocessing.xml	Tue Aug 08 12:35:11 2017 -0400
@@ -0,0 +1,96 @@
+<tool id="wgcna_preprocessing" name="WGCNA: preprocessing" version="1.0.0">
+    <requirements>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.2">r-rmarkdown</requirement>
+        <requirement type="package" version="1.8.4">r-plyr</requirement>
+        <requirement type="package" version="0.4.0">r-highcharter</requirement>
+        <requirement type="package" version="0.2">r-dt</requirement>
+        <requirement type="package" version="0.3.5">r-htmltools</requirement>
+        <requirement type="package" version="1.51">r-wgcna</requirement>
+    </requirements>
+    <description>
+        Data clearning and preprocessing.
+    </description>
+    <stdio>
+        <regex match="Execution halted"
+               source="both"
+               level="fatal"
+               description="Execution halted." />
+        <regex match="Error in"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your intput carefully and contact your administrator." />
+        <regex match="Fatal error"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your intput carefully and contact your administrator." />
+    </stdio>
+    <command>
+        <![CDATA[
+        ## Add tools to PATH
+        export PATH=/opt/R-3.2.5/bin:\$PATH &&
+
+        Rscript '${__tool_directory__}/wgcna_preprocessing_render.R'
+
+            ## 1. input data
+            -e $echo
+            -E $expression_data
+
+
+            ## 2. output report and report site directory
+		    -o $wgcna_preprocessing
+		    -d $wgcna_preprocessing.files_path
+		    -w $preprocessing_workspace
+
+		    ## 3. Rmd templates sitting in the tool directory
+
+		        ## _site.yml and index.Rmd template files
+                -D '${__tool_directory__}/wgcna_preprocessing.Rmd'
+
+
+
+        ]]>
+    </command>
+    <inputs>
+        <param type="data" name="expression_data" format="csv" optional="false" label="Gene expression data"
+               help="Each row represents a gene and each column represents a sample."/>
+
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" />
+    </inputs>
+    <outputs>
+        <data name="wgcna_preprocessing" format="html" label="WGCNA: preprocessing" />
+        <data name="preprocessing_workspace" format="rdata" label="R workspace: WGCNA preprocessing" />
+    </outputs>
+    <citations>
+        <citation type="bibtex">
+            @article{langfelder2008wgcna,
+            title={WGCNA: an R package for weighted correlation network analysis},
+            author={Langfelder, Peter and Horvath, Steve},
+            journal={BMC bioinformatics},
+            volume={9},
+            number={1},
+            pages={559},
+            year={2008},
+            publisher={BioMed Central}
+            }
+        </citation>
+        <citation type="bibtex">
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        </citation>
+        <citation type="bibtex">
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        </citation>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wgcna_preprocessing_render.R	Tue Aug 08 12:35:11 2017 -0400
@@ -0,0 +1,102 @@
+##======= Handle arguments from command line ========
+# setup R error handline to go to stderr
+options(show.error.messages=FALSE,
+        error=function(){
+          cat(geterrmessage(), file=stderr())
+          quit("no", 1, F)
+        })
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+# suppress warning
+options(warn = -1)
+
+options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)
+args = commandArgs(trailingOnly=TRUE)
+
+suppressPackageStartupMessages({
+  library(getopt)
+  library(tools)
+})
+
+# column 1: the long flag name
+# column 2: the short flag alias. A SINGLE character string
+# column 3: argument mask
+#           0: no argument
+#           1: argument required
+#           2: argument is optional
+# column 4: date type to which the flag's argument shall be cast.
+#           possible values: logical, integer, double, complex, character.
+spec_list=list()
+
+##------- 1. input data ---------------------
+spec_list$ECHO = c('echo', 'e', '1', 'character')
+spec_list$EXPRESSION_DATA = c('expression_data', 'E', '1', 'character')
+
+
+##--------2. output report and report site directory --------------
+spec_list$OUTPUT_HTML = c('wgcna_preprocessing_html', 'o', '1', 'character')
+spec_list$OUTPUT_DIR = c('wgcna_preprocessing_dir', 'd', '1', 'character')
+spec_list$PREPROCESSING_WORKSPACE = c('preprocessing_workspace', 'w', '1', 'character')
+
+##--------3. Rmd templates sitting in the tool directory ----------
+
+spec_list$WGCNA_PREPROCESSING_RMD = c('wgcna_preprocessing_rmd', 'D', '1', 'character')
+
+
+
+##------------------------------------------------------------------
+
+spec = t(as.data.frame(spec_list))
+opt = getopt(spec)
+# arguments are accessed by long flag name (the first column in the spec matrix)
+#                        NOT by element name in the spec_list
+# example: opt$help, opt$expression_file
+##====== End of arguments handling ==========
+
+#------ Load libraries ---------
+library(rmarkdown)
+library(WGCNA)
+library(DT)
+library(htmltools)
+
+
+#----- 1. create the report directory ------------------------
+system(paste0('mkdir -p ', opt$wgcna_preprocessing_dir))
+
+
+#----- 2. generate Rmd files with Rmd templates --------------
+#   a. templates without placeholder variables:
+#         copy templates from tool directory to the working directory.
+#   b. templates with placeholder variables:
+#         substitute variables with user input values and place them in the working directory.
+
+
+#----- 01 wgcna_preprocessing.Rmd -----------------------
+readLines(opt$wgcna_preprocessing_rmd) %>%
+  (function(x) {
+    gsub('ECHO', opt$echo, x)
+  }) %>%
+  (function(x) {
+    gsub('EXPRESSION_DATA', opt$expression_data, x)
+  }) %>%
+  (function(x) {
+    gsub('OUTPUT_DIR', opt$wgcna_preprocessing_dir, x)
+  }) %>%
+  (function(x) {
+    gsub('PREPROCESSING_WORKSPACE', opt$preprocessing_workspace, x)
+  }) %>%
+  (function(x) {
+    fileConn = file('wgcna_preprocessing.Rmd')
+    writeLines(x, con=fileConn)
+    close(fileConn)
+  })
+
+
+#------ 3. render all Rmd files --------
+render('wgcna_preprocessing.Rmd', output_file = opt$wgcna_preprocessing_html)
+
+#-------4. manipulate outputs -----------------------------
+
+