Mercurial > repos > mingchen0919 > rmarkdown_fastqc_site

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/01_evaluation_overview.Rmd	Tue Aug 08 10:14:46 2017 -0400
@@ -0,0 +1,123 @@
+---
+title: "Evaluation Overview"
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(echo = ECHO)
+```
+
+```{bash 'copy data from datasets directory to working directory', echo=FALSE}
+# Copy uploaded data to the working directory
+for f in $(echo READS | sed "s/,/ /g")
+do
+    cp $f ./
+done
+```
+
+```{bash 'run fastqc', echo=FALSE}
+# run fastqc and place outputs into the report directory
+for r in $(ls *.dat)
+do
+    fastqc -o REPORT_OUTPUT_DIR $r > /dev/null 2>&1
+done
+```
+
+```{bash 'parse fastqc results', echo=FALSE}
+##==== copy fastqc generated zip files from report output directory to job work directory ==
+cp -r REPORT_OUTPUT_DIR/*zip ./
+
+# create a file to store data file paths
+echo "sample_id,file_path" > PWF_file_paths.txt # Pass, Warning, Fail
+echo "sample_id,file_path" > PBQS_file_paths.txt # Per Base Quality Score
+echo "sample_id,file_path" > PSQS_file_paths.txt # Per Sequence Quality Score
+echo "sample_id,file_path" > PSGC_file_paths.txt # Per Sequence GC Content
+echo "sample_id,file_path" > PBSC_file_paths.txt # Per Base Sequence Content
+echo "sample_id,file_path" > PBNC_file_paths.txt # Per Base N Content
+echo "sample_id,file_path" > SDL_file_paths.txt # Sequence Duplication Level
+echo "sample_id,file_path" > SLD_file_paths.txt # Sequence Length Distribution
+echo "sample_id,file_path" > KMC_file_paths.txt # Kmer Content
+
+for i in $(ls *.zip)
+do
+    BASE=$(echo $i | sed 's/\(.*\)\.zip/\1/g')
+    echo $BASE
+    unzip ${BASE}.zip > /dev/null 2>&1
+
+    ##====== pass,warning,fail (WSF) =============
+    awk '/^>>/ {print}' "$BASE"/fastqc_data.txt | grep -v 'END_MODULE' | sed 's/>>//' > "$BASE"-PWF.txt
+    echo "${BASE},${BASE}-PWF.txt" >> PWF_file_paths.txt
+
+    ##====== per base quality scores (PBQS) ======
+    awk '/^>>Per base sequence quality/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-PBQS.txt
+    echo "${BASE},${BASE}-PBQS.txt" >> PBQS_file_paths.txt
+
+    ##====== per sequence quality scores (PSQS)
+    awk '/^>>Per sequence quality scores/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-PSQS.txt
+    echo "${BASE},${BASE}-PSQS.txt" >> PSQS_file_paths.txt
+
+    ##====== Per sequence GC content (PSGC)
+    awk '/^>>Per sequence GC content/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-PSGC.txt
+    echo "${BASE},${BASE}-PSGC.txt" >> PSGC_file_paths.txt
+
+    ##====== Per Base Sequence Content (PBSC)
+    awk '/^>>Per base sequence content/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-PBSC.txt
+    echo "${BASE},${BASE}-PBSC.txt" >> PBSC_file_paths.txt
+
+    ##====== Per Base N Content (PBNC)
+    awk '/^>>Per base N content/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-PBNC.txt
+    echo "${BASE},${BASE}-PBNC.txt" >> PBNC_file_paths.txt
+
+    ##====== Sequence Duplication Level (SDL)
+    awk '/^>>Sequence Duplication Levels/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-SDL.txt
+    echo "${BASE},${BASE}-SDL.txt" >> SDL_file_paths.txt
+
+    ##====== Sequence Length Distribution (SLD)
+    awk '/^>>Sequence Length Distribution/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-SLD.txt
+    echo "${BASE},${BASE}-SLD.txt" >> SLD_file_paths.txt
+
+    ##====== Kmer Content ============
+    awk '/^>>Kmer Content/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-KMC.txt
+    echo "${BASE},${BASE}-KMC.txt" >> KMC_file_paths.txt
+
+done
+```
+
+
+## Evaluation Overview
+
+```{r 'overview'}
+PWF_file_paths = read.csv('PWF_file_paths.txt',
+                           header = TRUE, stringsAsFactors = FALSE)
+rm('PWF_df')
+for(i in 1:nrow(PWF_file_paths)) {
+  file_path = PWF_file_paths[i,2]
+  pwf_df = read.csv(file_path,
+                     sep='\t', header=FALSE, stringsAsFactors = FALSE)
+  colnames(pwf_df) = c('item', PWF_file_paths[i,1])
+  if (!exists('PWF_df')) {
+    PWF_df = pwf_df
+  } else {
+    PWF_df = cbind(PWF_df, pwf_df[,2,drop=FALSE])
+  }
+}
+```
+
+
+```{r}
+my_icon = c('ok', 'remove', 'star')
+names(my_icon) = c('pass', 'fail', 'warn')
+evaluate_list = list()
+for (i in colnames(PWF_df)[-1]) {
+  evaluate_list[[i]] = formatter(
+      "span",
+      style = x ~ style("background-color" = ifelse(x =='pass', '#9CD027', ifelse(x == 'fail', '#CC0000', '#FF4E00')),
+                        "color" = "white",
+                        "width" = "50px",
+                        "float" = "left",
+                        "padding-right" = "5px")
+    )
+}
+
+formattable(PWF_df, evaluate_list)
+```
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/02_fastqc_original_reports.Rmd	Tue Aug 08 10:14:46 2017 -0400
@@ -0,0 +1,20 @@
+---
+title: "FastQC original reports"
+output: html_document
+---
+
+```{r 'FastQC original reports', include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(echo = ECHO)
+```
+
+
+Below are links to ***Fastqc*** original html reports.
+
+```{r 'html report links'}
+html_report_list = list()
+html_files = list.files('REPORT_OUTPUT_DIR', pattern = '.*html')
+for (i in html_files) {
+  html_report_list[[i]] = tags$li(tags$a(href=i, i))
+}
+tags$ul(html_report_list)
+```
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/1_per_base_quality_scores.Rmd	Tue Aug 08 10:14:46 2017 -0400
@@ -0,0 +1,62 @@
+---
+title: "Per Base Quality Scores"
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(echo = ECHO)
+```
+
+
+## Per Base Quality Scores
+
+```{r}
+PBQS_df = data.frame()
+PBQS_file_paths = read.csv('PBQS_file_paths.txt',
+                           header = TRUE, stringsAsFactors = FALSE)
+for(i in 1:nrow(PBQS_file_paths)) {
+  # file_path = paste0('REPORT_OUTPUT_DIR/', PBQS_file_paths[i,2])
+  file_path = PBQS_file_paths[i,2]
+  pbqs_df = read.csv(file_path,
+                     sep='\t', header=TRUE, stringsAsFactors = FALSE) %>%
+    mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]),
+           Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>%
+  (function (df) {
+    df1 = select(df, -Base2)
+    df2 = select(df, -Base1) %>% filter(Base2 != '')
+    colnames(df1) = c(colnames(df1)[1:7], 'Base')
+    colnames(df2) = c(colnames(df2)[1:7], 'Base')
+    res = rbind(df1, df2) %>% arrange(Base)
+    return(res)
+  })
+  pbqs_df$sample_id = rep(PBQS_file_paths[i,1], nrow(pbqs_df))
+  PBQS_df = rbind(PBQS_df, pbqs_df)
+}
+```
+
+
+```{r}
+# datatable(PBQS_df)
+max_phred = max(PBQS_df$Mean) + 10
+hchart(PBQS_df, "line", hcaes(x = Base, y = Mean, group = sample_id)) %>%
+  hc_title(
+    text = "Per Base Quality Score"
+  ) %>%
+  hc_yAxis(
+    title = list(text = "Mean Base Quality Score"),
+    min = 0,
+    max = max_phred,
+    plotLines = list(
+      list(label = list(text = "Phred Score = 27"),
+           width = 2,
+           dashStyle = "dash",
+           color = "green",
+           value = 27),
+      list(label = list(text = "Phred Score = 20"),
+           width = 2,
+           color = "red",
+           value = 20)
+    )
+  ) %>%
+  hc_exporting(enabled = TRUE)
+```
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/2_per_base_N_content.Rmd	Tue Aug 08 10:14:46 2017 -0400
@@ -0,0 +1,58 @@
+---
+title: "Per Base N Content"
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(echo = ECHO)
+```
+
+## Per Base N Content
+
+```{r}
+PBNC_df = data.frame()
+PBNC_file_paths = read.csv('PBNC_file_paths.txt',
+                           header = TRUE, stringsAsFactors = FALSE)
+for(i in 1:nrow(PBNC_file_paths)) {
+  # file_path = paste0('REPORT_OUTPUT_DIR/', PBNC_file_paths[i,2])
+  file_path = PBNC_file_paths[i,2]
+  pbnc_df = read.csv(file_path,
+                     sep='\t', header=TRUE, stringsAsFactors = FALSE) %>%
+    mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]),
+           Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>%
+  (function (df) {
+    df1 = select(df, -Base2)
+    df2 = select(df, -Base1) %>% filter(Base2 != '')
+    colnames(df1) = c(colnames(df1)[1:2], 'Base')
+    colnames(df2) = c(colnames(df2)[1:2], 'Base')
+    res = rbind(df1, df2) %>% arrange(Base)
+    return(res)
+  })
+  pbnc_df$sample_id = rep(PBNC_file_paths[i,1], nrow(pbnc_df))
+  PBNC_df = rbind(PBNC_df, pbnc_df)
+}
+```
+
+
+```{r}
+PBNC_df$N.Count = PBNC_df$N.Count * 100
+max_phred = max(PBNC_df$N.Count) + 5
+hchart(PBNC_df, "line", hcaes(x = as.character(Base), y = N.Count, group = sample_id)) %>%
+  hc_title(
+    text = "Per Base N Content"
+  ) %>%
+  hc_xAxis(
+    title = list(text = "Base Position")
+  ) %>%
+  hc_yAxis(
+    title = list(text = "N %"),
+    plotLines = list(
+      list(label = list(text = "N = 5%"),
+           width = 2,
+           dashStyle = "dash",
+           color = "red",
+           value = 5)
+    )
+  ) %>%
+  hc_exporting(enabled = TRUE)
+```
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/3_per_sequence_quality_scores.Rmd	Tue Aug 08 10:14:46 2017 -0400
@@ -0,0 +1,50 @@
+---
+title: "Per Sequence Quality Scores"
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(echo = ECHO)
+```
+
+## Per Sequence Quality Scores
+
+```{r}
+PSQS_df = data.frame()
+PSQS_file_paths = read.csv('PSQS_file_paths.txt',
+                           header = TRUE, stringsAsFactors = FALSE)
+for(i in 1:nrow(PSQS_file_paths)) {
+  # file_path = paste0('REPORT_OUTPUT_DIR/', PSQS_file_paths[i,2])
+  file_path = PSQS_file_paths[i,2]
+  psqs_df = read.csv(file_path,
+                     sep='\t', header=TRUE, stringsAsFactors = FALSE)
+  psqs_df$sample_id = rep(PSQS_file_paths[i,1], nrow(psqs_df))
+  PSQS_df = rbind(PSQS_df, psqs_df)
+}
+```
+
+
+```{r}
+max_phred = max(PSQS_df$X.Quality) + 5
+hchart(PSQS_df, "line", hcaes(x = X.Quality, y = Count, group = sample_id)) %>%
+  hc_title(
+    text = "Per Sequence Quality Score"
+  ) %>%
+  hc_xAxis(
+    title = list(text = "Mean Sequence Quality Score"),
+    min = 0,
+    max = max_phred,
+    plotLines = list(
+      list(label = list(text = "Phred Score = 27"),
+           width = 2,
+           dashStyle = "dash",
+           color = "green",
+           value = 27),
+      list(label = list(text = "Phred Score = 20"),
+           width = 2,
+           color = "red",
+           value = 20)
+    )
+  ) %>%
+  hc_exporting(enabled = TRUE)
+```
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/4_per_sequence_GC_content.Rmd	Tue Aug 08 10:14:46 2017 -0400
@@ -0,0 +1,38 @@
+---
+title: "Per Sequence GC Content"
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(echo = ECHO)
+```
+
+## Per Sequence GC Content
+
+
+```{r}
+PSGC_df = data.frame()
+PSGC_file_paths = read.csv('PSGC_file_paths.txt',
+                           header = TRUE, stringsAsFactors = FALSE)
+for(i in 1:nrow(PSGC_file_paths)) {
+  # file_path = paste0('REPORT_OUTPUT_DIR/', PSGC_file_paths[i,2])
+  file_path = PSGC_file_paths[i,2]
+  psgc_df = read.csv(file_path,
+                     sep='\t', header=TRUE, stringsAsFactors = FALSE)
+  psgc_df$sample_id = rep(PSGC_file_paths[i,1], nrow(psgc_df))
+  PSGC_df = rbind(PSGC_df, psgc_df)
+}
+```
+
+
+```{r}
+max_phred = max(PSGC_df$Count) + 5
+hchart(PSGC_df, "line", hcaes(x = X.GC.Content, y = Count, group = sample_id)) %>%
+  hc_title(
+    text = "Per Sequence GC Content"
+  ) %>%
+  hc_xAxis(
+    title = list(text = "% GC")
+  ) %>%
+  hc_exporting(enabled = TRUE)
+```
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/5_per_base_sequence_content.Rmd	Tue Aug 08 10:14:46 2017 -0400
@@ -0,0 +1,45 @@
+---
+title: "Per Base Sequence Content"
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(echo = ECHO)
+```
+
+## Per Base Sequence Content
+
+```{r}
+PBSC_df = data.frame()
+PBSC_file_paths = read.csv('PBSC_file_paths.txt',
+                           header = TRUE, stringsAsFactors = FALSE)
+for(i in 1:nrow(PBSC_file_paths)) {
+  # file_path = paste0('REPORT_OUTPUT_DIR/', PBSC_file_paths[i,2])
+  file_path = PBSC_file_paths[i,2]
+  pbsc_df = read.csv(file_path,
+                     sep='\t', header=TRUE, stringsAsFactors = FALSE) %>%
+    mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]),
+           Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>%
+  (function (df) {
+    df1 = select(df, -Base2)
+    df2 = select(df, -Base1) %>% filter(Base2 != '')
+    colnames(df1) = c(colnames(df1)[1:5], 'Base')
+    colnames(df2) = c(colnames(df2)[1:5], 'Base')
+    res = rbind(df1, df2) %>% arrange(Base)
+    return(res)
+  })
+  pbsc_df$sample_id = rep(PBSC_file_paths[i,1], nrow(pbsc_df))
+  PBSC_df = rbind(PBSC_df, pbsc_df)
+}
+```
+
+
+```{r out.width="100%"}
+PBSC_df_2 = select(PBSC_df, -X.Base) %>%
+  melt(id = c('Base', 'sample_id'), value.name = 'base_percentage')
+p = ggplot(data = PBSC_df_2, aes(x = Base, y = base_percentage, group = variable, color = variable)) +
+  geom_line() +
+  facet_wrap(~ sample_id)
+ggplotly(p)
+```
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/_site.yml	Tue Aug 08 10:14:46 2017 -0400
@@ -0,0 +1,29 @@
+name: "FastQC Website"
+output_dir: "my_site"
+navbar:
+    title: "FastQC"
+    type: inverse
+    left:
+        - text: "Home"
+          icon: fa-home
+          href: index.html
+        - text: "Evaluation Overview"
+          href: 01_evaluation_overview.html
+        - text: "Evaluation Items"
+          menu:
+            - text: "Per Base Quality Scores"
+              href: 1_per_base_quality_scores.html
+            - text: "Per Base N Content"
+              href: 2_per_base_N_content.html
+            - text: "Per Sequence Quality Scores"
+              href: 3_per_sequence_quality_scores.html
+            - text: "Per Sequence GC Content"
+              href: 4_per_sequence_GC_content.html
+            - text: "Per Base Sequence Content"
+              href: 5_per_base_sequence_content.html
+        - text: "Original FastQC Reports"
+          href: 02_fastqc_original_reports.html
+output:
+  html_document:
+    theme: cosmo
+    highlight: textmate
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastqc_site.xml	Tue Aug 08 10:14:46 2017 -0400
@@ -0,0 +1,125 @@
+<tool id="fastqc_site" name="Fastqc Site" version="1.0.0">
+    <requirements>
+        <requirement type="package" version="1.15.2">pandoc</requirement>
+        <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.2">r-rmarkdown</requirement>
+        <requirement type="package" version="1.8.4">r-plyr</requirement>
+        <requirement type="package" version="1.1.0">r-stringr</requirement>
+        <requirement type="package" version="0.5.0">r-highcharter</requirement>
+        <requirement type="package" version="0.2">r-dt</requirement>
+        <requirement type="package" version="1.4.2">r-reshape2</requirement>
+        <requirement type="package" version="4.5.6">r-plotly</requirement>
+        <requirement type="package" version="0.2.0.1">r-formattable</requirement>
+        <requirement type="package" version="0.3.5">r-htmltools</requirement>
+        <requirement type="package" version="0.11.5">fastqc</requirement>
+    </requirements>
+    <description>
+        Implements FastQC analysis and display results in R Markdown website.
+    </description>
+    <stdio>
+        <regex match="Execution halted"
+               source="both"
+               level="fatal"
+               description="Execution halted." />
+        <regex match="Error in"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your intput carefully and contact your administrator." />
+        <regex match="Fatal error"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your intput carefully and contact your administrator." />
+    </stdio>
+    <command>
+        <![CDATA[
+
+        Rscript '${__tool_directory__}/fastqc_site_render.R'
+
+            ## 1. input data
+            -r $reads
+            -e $echo
+
+            ## 2. output report and report site directory
+		    -o $fastqc_site
+		    -d $fastqc_site.files_path
+
+		    ## 3. Rmd templates sitting in the tool directory
+
+		        ## _site.yml and index.Rmd template files
+                -s '${__tool_directory__}/_site.yml'
+                -i '${__tool_directory__}/index.Rmd'
+
+                ## other Rmd body template files
+		        -p  '${__tool_directory__}/01_evaluation_overview.Rmd'
+		        -a  '${__tool_directory__}/02_fastqc_original_reports.Rmd'
+		        -b  '${__tool_directory__}/1_per_base_quality_scores.Rmd'
+		        -c  '${__tool_directory__}/2_per_base_N_content.Rmd'
+		        -f  '${__tool_directory__}/3_per_sequence_quality_scores.Rmd'
+		        -g  '${__tool_directory__}/4_per_sequence_GC_content.Rmd'
+		        -h  '${__tool_directory__}/5_per_base_sequence_content.Rmd'
+
+        ]]>
+    </command>
+    <inputs>
+        <param format="fastq,fastq.gz,fastq.bz2,bam,sam" multiple="true" name="reads" type="data" label="Short reads data from history" />
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" />
+    </inputs>
+    <outputs>
+        <data format="html" name="fastqc_site" label="fastqc site" />
+    </outputs>
+    <citations>
+        <citation type="bibtex">
+            @misc{bioinformatics2014fastqc,
+            title={FastQC},
+            author={Bioinformatics, Babraham},
+            year={2014}
+            }
+        </citation>
+        <citation type="bibtex">
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        </citation>
+        <citation type="bibtex">
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        </citation>
+        <citation type="bibtex">
+            @Manual{,
+            title = {plotly: Create Interactive Web Graphics via 'plotly.js'},
+            author = {Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and Marianne Corvellec and Pedro Despouy},
+            year = {2017},
+            note = {R package version 4.6.0},
+            url = {https://CRAN.R-project.org/package=plotly},
+            }
+        </citation>
+        <citation type="bibtex">
+            @Manual{,
+            title = {highcharter: A Wrapper for the 'Highcharts' Library},
+            author = {Joshua Kunst},
+            year = {2017},
+            note = {R package version 0.5.0},
+            url = {https://CRAN.R-project.org/package=highcharter},
+            }
+        </citation>
+        <citation type="bibtex">
+            @Manual{,
+            title = {formattable: Create 'Formattable' Data Structures},
+            author = {Kun Ren and Kenton Russell},
+            year = {2016},
+            note = {R package version 0.2.0.1},
+            url = {https://CRAN.R-project.org/package=formattable},
+            }
+        </citation>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastqc_site_render.R	Tue Aug 08 10:14:46 2017 -0400
@@ -0,0 +1,196 @@
+##======= Handle arguments from command line ========
+# setup R error handline to go to stderr
+options(show.error.messages=FALSE,
+        error=function(){
+          cat(geterrmessage(), file=stderr())
+          quit("no", 1, F)
+        })
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+# suppress warning
+options(warn = -1)
+
+options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)
+args = commandArgs(trailingOnly=TRUE)
+
+suppressPackageStartupMessages({
+  library(getopt)
+  library(tools)
+})
+
+# column 1: the long flag name
+# column 2: the short flag alias. A SINGLE character string
+# column 3: argument mask
+#           0: no argument
+#           1: argument required
+#           2: argument is optional
+# column 4: date type to which the flag's argument shall be cast.
+#           possible values: logical, integer, double, complex, character.
+spec_list=list()
+
+##------- 1. input data ---------------------
+spec_list$READS = c('reads', 'r', '1', 'character')
+spec_list$ECHO = c('echo', 'e', '1', 'character')
+
+##--------2. output report and report site directory --------------
+spec_list$FASTQC_SITE = c('fastqc_site', 'o', '1', 'character')
+spec_list$FASTQC_SITE_DIR = c('fastqc_site_dir', 'd', '1', 'character')
+
+##--------3. Rmd templates sitting in the tool directory ----------
+
+    ## _site.yml and index.Rmd files
+    spec_list$SITE_YML = c('site_yml', 's', 1, 'character')
+    spec_list$INDEX_Rmd = c('index_rmd', 'i', 1, 'character')
+
+    ## other Rmd body template files
+    spec_list$x01 = c('x01_evaluation_overview', 'p', '1', 'character')
+    spec_list$x02 = c('x02_fastqc_original_reports', 'a', '1', 'character')
+    spec_list$x1 = c('x1_per_base_quality_scores', 'b', '1', 'character')
+    spec_list$x2 = c('x2_per_base_N_content', 'c', '1', 'character')
+    spec_list$x3 = c('x3_per_sequence_quality_scores', 'f', '1', 'character')
+    spec_list$x4 = c('x4_per_sequence_GC_content', 'g', '1', 'character')
+    spec_list$x5 = c('x5_per_base_sequence_content', 'h', '1', 'character')
+
+##------------------------------------------------------------------
+
+spec = t(as.data.frame(spec_list))
+opt = getopt(spec)
+# arguments are accessed by long flag name (the first column in the spec matrix)
+#                        NOT by element name in the spec_list
+# example: opt$help, opt$expression_file
+##====== End of arguments handling ==========
+
+#------ Load libraries ---------
+library(rmarkdown)
+library(plyr)
+library(stringr)
+library(dplyr)
+library(highcharter)
+library(DT)
+library(reshape2)
+library(Kmisc)
+library(plotly)
+library(formattable)
+library(htmltools)
+
+
+#----- 1. create the report directory ------------------------
+paste0('mkdir -p ', opt$fastqc_site_dir) %>%
+  system()
+
+#----- 2. generate Rmd files with Rmd templates --------------
+#   a. templates without placeholder variables:
+#         copy templates from tool directory to the working directory.
+#   b. templates with placeholder variables:
+#         substitute variables with user input values and place them in the working directory.
+
+
+    #----- Copy index.Rmd and _site.yml files to job working direcotry -----
+    file.copy(opt$index_rmd, 'index.Rmd', recursive=TRUE)
+    file.copy(opt$site_yml, '_site.yml', recursive=TRUE)
+    #---------------------------------------------------------
+
+    #----- 01_evaluation_overview.Rmd -----------------------
+    readLines(opt$x01_evaluation_overview) %>%
+      (function(x) {
+        gsub('ECHO', opt$echo, x)
+      }) %>%
+      (function(x) {
+        gsub('READS', opt$reads, x)
+      }) %>%
+      (function(x) {
+        gsub('REPORT_OUTPUT_DIR', opt$fastqc_site_dir, x)
+      }) %>%
+      (function(x) {
+        fileConn = file('01_evaluation_overview.Rmd')
+        writeLines(x, con=fileConn)
+        close(fileConn)
+      })
+
+    #----- 1_per_base_quality_scores.Rmd --------------------
+    readLines(opt$x1_per_base_quality_scores) %>%
+      (function(x) {
+        gsub('ECHO', opt$echo, x)
+      }) %>%
+      (function(x) {
+        fileConn = file('1_per_base_quality_scores.Rmd')
+        writeLines(x, con=fileConn)
+        close(fileConn)
+      })
+
+    #----- 2_per_base_N_content.Rmd -------------------------
+    readLines(opt$x2_per_base_N_content) %>%
+      (function(x) {
+        gsub('ECHO', opt$echo, x)
+      }) %>%
+      (function(x) {
+        fileConn = file('2_per_base_N_content.Rmd')
+        writeLines(x, con=fileConn)
+        close(fileConn)
+      })
+
+    #----- 3_per_sequence_quality_scores.Rmd ----------------
+    readLines(opt$x3_per_sequence_quality_scores) %>%
+      (function(x) {
+        gsub('ECHO', opt$echo, x)
+      }) %>%
+      (function(x) {
+        fileConn = file('3_per_sequence_quality_scores.Rmd')
+        writeLines(x, con=fileConn)
+        close(fileConn)
+      })
+
+
+    #----- 4_per_sequence_GC_content.Rmd --------------------
+    readLines(opt$x4_per_sequence_GC_content) %>%
+      (function(x) {
+        gsub('ECHO', opt$echo, x)
+      }) %>%
+      (function(x) {
+        fileConn = file('4_per_sequence_GC_content.Rmd')
+        writeLines(x, con=fileConn)
+        close(fileConn)
+      })
+
+
+    #----- 5_per_base_sequence_content.Rmd ------------------
+    readLines(opt$x5_per_base_sequence_content) %>%
+      (function(x) {
+        gsub('ECHO', opt$echo, x)
+      }) %>%
+      (function(x) {
+        fileConn = file('5_per_base_sequence_content.Rmd')
+        writeLines(x, con=fileConn)
+        close(fileConn)
+      })
+
+    #----- 02_fastqc_original_reports.Rmd -------------------
+    readLines(opt$x02_fastqc_original_reports) %>%
+      (function(x) {
+        gsub('ECHO', opt$echo, x)
+      }) %>%
+      (function(x) {
+        gsub('REPORT_OUTPUT_DIR', opt$fastqc_site_dir, x)
+      }) %>%
+      (function(x) {
+        fileConn = file('02_fastqc_original_reports.Rmd')
+        writeLines(x, con=fileConn)
+        close(fileConn)
+      })
+
+
+
+#------ 3. render all Rmd files with render_site() --------
+render_site()
+
+
+#-------4. manipulate outputs -----------------------------
+#   a. copy index.html to the report output path
+#   b. copy all files in 'my_site' to the report output directory
+file.copy('my_site/index.html', opt$fastqc_site, recursive=TRUE)
+paste0('cp -r my_site/* ', opt$fastqc_site_dir) %>%
+  system()
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/index.Rmd	Tue Aug 08 10:14:46 2017 -0400
@@ -0,0 +1,20 @@
+---
+title: "FastQC Report"
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+
+
+## References
+
+* Andrews, Simon. "FastQC: a quality control tool for high throughput sequence data." (2010): 175-176.
+* Goecks, Jeremy, Anton Nekrutenko, and James Taylor. "Galaxy: a comprehensive approach for supporting accessible, reproducible, and transparent computational research in the life sciences." Genome biology 11.8 (2010): R86.
+* Afgan, Enis, et al. "The Galaxy platform for accessible, reproducible and collaborative biomedical analyses: 2016 update." Nucleic acids research (2016): gkw343.
+* Highcharts. https://www.highcharts.com/. (access by May 26, 2017).
+* R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/.
+* Joshua Kunst (2017). highcharter: A Wrapper for the 'Highcharts' Library. R package version 0.5.0. https://CRAN.R-project.org/package=highcharter
+* Carson Sievert, Chris Parmer, Toby Hocking, Scott Chamberlain, Karthik Ram, Marianne Corvellec and Pedro Despouy (2017). plotly: Create Interactive Web Graphics via 'plotly.js'. R package version 4.6.0. https://CRAN.R-project.org/package=plotly