Mercurial > repos > mingchen0919 > aurora_fastqc_site

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/01_evaluation_overview.Rmd	Tue Feb 27 10:37:12 2018 -0500
@@ -0,0 +1,95 @@
+---
+title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE,
+  eval = TRUE
+)
+```
+
+# Run FastQC
+
+```{bash eval=TRUE,echo=FALSE}
+cd ${X_d}
+cp ${X_r} ${X_d}/read_1.fq
+cp ${X_R} ${X_d}/read_2.fq
+
+cat >temp.sh <<EOL
+fastqc \\
+  -q \\
+  -c ${X_c} \\
+  -l ${X_l} \\
+  ${X_d}/read_1.fq > /dev/null 2>&1
+
+fastqc \\
+  -q \\
+  -c ${X_c} \\
+  -l ${X_l} \\
+  ${X_d}/read_2.fq > /dev/null 2>&1
+EOL
+
+grep -v None temp.sh > fastqc.sh
+
+# run fastqc
+sh fastqc.sh
+
+# unzip outputs
+unzip -q read_1_fastqc.zip
+unzip -q read_2_fastqc.zip
+```
+
+```{r}
+# display fastqc job script
+fastqc_sh = paste0(opt$X_d, '/fastqc.sh')
+tags$code(tags$pre(readChar(fastqc_sh, file.info(fastqc_sh)$size )))
+```
+
+# Raw FastQC reports
+
+## Before trimming
+```{r eval=TRUE}
+ori_html = tags$a(href = 'read_1_fastqc/fastqc_report.html', opt$X_n)
+ori_fastqc_data = tags$a(href = 'read_1_fastqc/fastqc_data.txt', 'fastqc_data.txt')
+ori_summary = tags$a(href = 'read_1_fastqc/summary.txt', 'summary.txt')
+tags$ul(
+    tags$li(ori_html),
+    tags$li(ori_fastqc_data),
+    tags$li(ori_summary)
+  )
+```
+
+## After trimming
+```{r eval=TRUE}
+ori_html = tags$a(href = 'read_2_fastqc/fastqc_report.html', opt$X_n)
+ori_fastqc_data = tags$a(href = 'read_2_fastqc/fastqc_data.txt', 'fastqc_data.txt')
+ori_summary = tags$a(href = 'read_2_fastqc/summary.txt', 'summary.txt')
+tags$ul(
+    tags$li(ori_html),
+    tags$li(ori_fastqc_data),
+    tags$li(ori_summary)
+  )
+```
+
+
+# Fastqc Output Visualization
+
+## Overview
+
+```{r eval=TRUE}
+read_1_summary = read.csv(paste0(opt$X_d, '/read_1_fastqc/summary.txt'), header = FALSE, sep = '\t')[, 2:1]
+read_2_summary = read.csv(paste0(opt$X_d, '/read_2_fastqc/summary.txt'), header = FALSE, sep = '\t')[, 1]
+combined_summary = cbind(read_1_summary, read_2_summary)
+names(combined_summary) = c('MODULE', paste0(opt$X_n, '(before)'), paste0(opt$X_N, '(after)'))
+combined_summary[combined_summary == 'FAIL'] = 'FAIL (X)'
+combined_summary[combined_summary == 'WARN'] = 'WARN (!)'
+knitr::kable(combined_summary)
+```
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/02_per_base_sequence_quality.Rmd	Tue Feb 27 10:37:12 2018 -0500
@@ -0,0 +1,62 @@
+---
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE,
+  eval = TRUE
+)
+```
+
+
+```{r 'function definition', echo=FALSE}
+# Define a function to extract outputs for each module from fastqc output
+extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") {
+  f = readLines(fastqc_data)
+  start_line = grep(module_name, f)
+  end_module_lines = grep('END_MODULE', f)
+  end_line = end_module_lines[which(end_module_lines > start_line)[1]]
+  module_data = f[(start_line+1):(end_line-1)]
+  writeLines(module_data, '/tmp/temp.txt')
+  read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char)
+}
+```
+
+# Per base sequence quality
+
+```{r 'per base sequence quality', fig.width=10}
+## reads 1
+pbsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence quality')
+pbsq_1$id = 1:length(pbsq_1$X.Base)
+pbsq_1$trim = 'before'
+
+## reads 2
+pbsq_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base sequence quality')
+pbsq_2$id = 1:length(pbsq_2$X.Base)
+pbsq_2$trim = 'after'
+
+comb_pbsq = rbind(pbsq_1, pbsq_2)
+comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim)
+
+p = ggplot(data = comb_pbsq) +
+  geom_boxplot(mapping = aes(x = id,
+                             lower = Lower.Quartile,
+                             upper = Upper.Quartile,
+                             middle = Median,
+                             ymin = X10th.Percentile,
+                             ymax = X90th.Percentile,
+                             fill = "yellow"),
+               stat = 'identity') +
+  geom_line(mapping = aes(x = id, y = Mean, color = "red")) +
+  scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) +
+  scale_fill_identity() +
+  scale_color_identity() +
+  ylim(0, max(comb_pbsq$Upper.Quartile) + 5) +
+  xlab('Position in read (bp)') +
+  facet_grid(. ~ trim) +
+  theme(axis.text.x = element_text(angle=45))
+p
+
+```
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/03_per_tile_sequence_quality.Rmd	Tue Feb 27 10:37:12 2018 -0500
@@ -0,0 +1,44 @@
+---
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE,
+  eval = TRUE
+)
+```
+
+# Per tile sequence quality
+
+```{r 'per tile sequence quality', fig.width=10}
+## check if 'per tile sequence quality' module exits or not
+check_ptsq = grep('Per tile sequence quality', readLines(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt')))
+if (length(check_ptsq) > 0) {
+    ## reads 1
+  ptsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per tile sequence quality')
+  ptsq_1$trim = 'before'
+
+  ## reads 2
+  ptsq_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per tile sequence quality')
+  ptsq_2$trim = 'after'
+
+  comb_ptsq = rbind(ptsq_1, ptsq_2)
+  comb_ptsq$trim = factor(levels = c('before', 'after'), comb_ptsq$trim)
+  comb_ptsq$Base = factor(levels = unique(comb_ptsq$Base), comb_ptsq$Base)
+
+  # convert integers to charaters
+  comb_ptsq$Tile = as.character(comb_ptsq$X.Tile)
+
+  p = ggplot(data = comb_ptsq, aes(x = Base, y = Tile, fill = Mean)) +
+    geom_raster() +
+    facet_grid(. ~ trim) +
+    xlab('Position in read (bp)') +
+    ylab('') +
+    theme(axis.text.x = element_text(angle=45))
+  ggplotly(p)
+} else {
+  print('No "per tile sequence quality" data')
+}
+```
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/04_per_sequence_quality_score.Rmd	Tue Feb 27 10:37:12 2018 -0500
@@ -0,0 +1,35 @@
+---
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE,
+  eval = TRUE
+)
+```
+
+
+# Per sequence quality score
+
+```{r 'Per sequence quality score', fig.width=10}
+## reads 1
+psqs_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per sequence quality scores')
+psqs_1$trim = 'before'
+
+## reads 2
+psqs_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per sequence quality scores')
+psqs_2$trim = 'after'
+
+comb_psqs = rbind(psqs_1, psqs_2)
+comb_psqs$trim = factor(levels = c('before', 'after'), comb_psqs$trim)
+
+p = ggplot(data = comb_psqs, aes(x = X.Quality, y = Count)) +
+  geom_line(color = 'red') +
+  facet_grid(. ~ trim) +
+  xlim(min(comb_psqs$X.Quality), max(comb_psqs$X.Quality)) +
+  xlab('Mean Sequence Qaulity (Phred Score)') +
+  ylab('')
+ggplotly(p)
+```
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/05_per_base_sequence_content.Rmd	Tue Feb 27 10:37:12 2018 -0500
@@ -0,0 +1,43 @@
+---
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE,
+  eval = TRUE
+)
+```
+
+
+# Per base sequence content
+
+```{r 'Per base sequence content', fig.width=10}
+## reads 1
+pbsc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence content')
+pbsc_1$id = 1:length(pbsc_1$X.Base)
+
+melt_pbsc_1 = melt(pbsc_1, id=c('X.Base', 'id'))
+melt_pbsc_1$trim = 'before'
+
+
+## reads 2
+pbsc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base sequence content')
+pbsc_2$id = 1:length(pbsc_2$X.Base)
+
+melt_pbsc_2 = melt(pbsc_2, id=c('X.Base', 'id'))
+melt_pbsc_2$trim = 'after'
+
+comb_pbsc = rbind(melt_pbsc_1, melt_pbsc_2)
+comb_pbsc$trim = factor(levels = c('before', 'after'), comb_pbsc$trim)
+
+p = ggplot(data = comb_pbsc, aes(x = id, y = value, color = variable)) +
+  geom_line() +
+  facet_grid(. ~ trim) +
+  xlim(min(comb_pbsc$id), max(comb_pbsc$id)) +
+  ylim(0, 100) +
+  xlab('Position in read (bp)') +
+  ylab('')
+ggplotly(p)
+```
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/06_per_sequence_gc_content.Rmd	Tue Feb 27 10:37:12 2018 -0500
@@ -0,0 +1,33 @@
+---
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE,
+  eval = TRUE
+)
+```
+
+# Per sequence GC content
+
+```{r 'Per sequence GC content', fig.width=10}
+## reads 1
+psGCc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per sequence GC content')
+psGCc_1$trim = 'before'
+
+## reads 2
+psGCc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per sequence GC content')
+psGCc_2$trim = 'after'
+
+comb_psGCc = rbind(psGCc_1, psGCc_2)
+comb_psGCc$trim = factor(levels = c('before', 'after'), comb_psGCc$trim)
+
+p = ggplot(data = comb_psGCc, aes(x = X.GC.Content, y = Count)) +
+  geom_line(color = 'red') +
+  facet_grid(. ~ trim) +
+  xlab('Mean Sequence Qaulity (Phred Score)') +
+  ylab('')
+ggplotly(p)
+```
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/07_per_base_n_content.Rmd	Tue Feb 27 10:37:12 2018 -0500
@@ -0,0 +1,38 @@
+---
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE,
+  eval = TRUE
+)
+```
+
+# Per base N content
+
+```{r 'Per base N content', fig.width=10}
+## reads 1
+pbNc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base N content')
+pbNc_1$id = 1:length(pbNc_1$X.Base)
+pbNc_1$trim = 'before'
+
+## reads 2
+pbNc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base N content')
+pbNc_2$id = 1:length(pbNc_2$X.Base)
+pbNc_2$trim = 'after'
+
+comb_pbNc = rbind(pbNc_1, pbNc_2)
+comb_pbNc$trim = factor(levels = c('before', 'after'), comb_pbNc$trim)
+
+p = ggplot(data = comb_pbNc, aes(x = id, y = N.Count)) +
+  geom_line(color = 'red') +
+  scale_x_continuous(breaks = pbNc_2$id, labels = pbNc_2$X.Base) +
+  facet_grid(. ~ trim) +
+  ylim(0, 1) +
+  xlab('N-Count') +
+  ylab('') +
+  theme(axis.text.x = element_text(angle=45))
+ggplotly(p)
+```
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/08_sequence_length_distribution.Rmd	Tue Feb 27 10:37:12 2018 -0500
@@ -0,0 +1,37 @@
+---
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE,
+  eval = TRUE
+)
+```
+
+# Sequence Length Distribution
+
+```{r 'Sequence Length Distribution', fig.width=10}
+## reads 1
+sld_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Sequence Length Distribution')
+sld_1$id = 1:length(sld_1$X.Length)
+sld_1$trim = 'before'
+
+## reads 2
+sld_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Sequence Length Distribution')
+sld_2$id = 1:length(sld_2$X.Length)
+sld_2$trim = 'after'
+
+comb_sld = rbind(sld_1, sld_2)
+comb_sld$trim = factor(levels = c('before', 'after'), comb_sld$trim)
+
+p = ggplot(data = comb_sld, aes(x = id, y = Count)) +
+  geom_line(color = 'red') +
+  scale_x_continuous(breaks = sld_2$id, labels = sld_2$X.Length) +
+  facet_grid(. ~ trim) +
+  xlab('Sequence Length (bp)') +
+  ylab('') +
+  theme(axis.text.x = element_text(angle=45))
+ggplotly(p)
+```
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/09_sequence_duplication_levels.Rmd	Tue Feb 27 10:37:12 2018 -0500
@@ -0,0 +1,45 @@
+---
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE,
+  eval = TRUE
+)
+```
+
+
+# Sequence Duplication Levels
+
+```{r 'Sequence Duplication Levels', fig.width=10}
+## reads 1
+sdl_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Sequence Duplication Levels', header = FALSE, comment.char = '#')
+names(sdl_1) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total')
+sdl_1$id = 1:length(sdl_1$Duplication_Level)
+
+melt_sdl_1 = melt(sdl_1, id=c('Duplication_Level', 'id'))
+melt_sdl_1$trim = 'before'
+
+
+## reads 2
+sdl_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Sequence Duplication Levels', header = FALSE, comment.char = '#')
+names(sdl_2) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total')
+sdl_2$id = 1:length(sdl_2$Duplication_Level)
+
+melt_sdl_2 = melt(sdl_2, id=c('Duplication_Level', 'id'))
+melt_sdl_2$trim = 'after'
+
+comb_sdl = rbind(melt_sdl_1, melt_sdl_2)
+comb_sdl$trim = factor(levels = c('before', 'after'), comb_sdl$trim)
+
+p = ggplot(data = comb_sdl, aes(x = id, y = value, color = variable)) +
+  geom_line() +
+  scale_x_continuous(breaks = sdl_2$id, labels = sdl_2$Duplication_Level) +
+  facet_grid(. ~ trim) +
+  xlab('Sequence Duplication Level') +
+  ylab('') +
+  theme(axis.text.x = element_text(angle=45))
+ggplotly(p)
+```
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/10_adapter_content.Rmd	Tue Feb 27 10:37:12 2018 -0500
@@ -0,0 +1,41 @@
+---
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE,
+  eval = TRUE
+)
+```
+
+# Adapter Content
+
+```{r 'Adapter Content', fig.width=10}
+## reads 1
+ac_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Adapter Content')
+ac_1$id = 1:length(ac_1$X.Position)
+
+melt_ac_1 = melt(ac_1, id=c('X.Position', 'id'))
+melt_ac_1$trim = 'before'
+
+## reads 2
+ac_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Adapter Content')
+ac_2$id = 1:length(ac_2$X.Position)
+
+melt_ac_2 = melt(ac_2, id=c('X.Position', 'id'))
+melt_ac_2$trim = 'after'
+
+comb_ac = rbind(melt_ac_1, melt_ac_2)
+comb_ac$trim = factor(levels = c('before', 'after'), comb_ac$trim)
+
+p = ggplot(data = comb_ac, aes(x = id, y = value, color = variable)) +
+  geom_line() +
+  facet_grid(. ~ trim) +
+  xlim(min(comb_ac$id), max(comb_ac$id)) +
+  ylim(0, 1) +
+  xlab('Position in read (bp)') +
+  ylab('')
+ggplotly(p)
+```
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/11_kmer_content.Rmd	Tue Feb 27 10:37:12 2018 -0500
@@ -0,0 +1,26 @@
+---
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE,
+  eval = TRUE
+)
+```
+
+# Kmer Content {.tabset}
+
+## Before
+
+```{r 'Kmer Content (before)', fig.width=10}
+kc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Kmer Content')
+knitr::kable(kc_1)
+```
+
+## After
+```{r 'Kmer Content (after)', fig.width=10}
+kc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Kmer Content')
+knitr::kable(kc_2)
+```
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/_site.yml	Tue Feb 27 10:37:12 2018 -0500
@@ -0,0 +1,37 @@
+name: "FastQC Website"
+output_dir: "../_site"
+navbar:
+    title: "FastQC"
+    type: inverse
+    left:
+        - text: "Home"
+          icon: fa-home
+          href: index.html
+        - text: "Evaluation Overview"
+          href: 01_evaluation_overview.html
+        - text: "Evaluation by data module"
+          menu:
+            - text: "Per Base Sequence Quality"
+              href: 02_per_base_sequence_quality.html
+            - text: "Per Tile Sequence Quality"
+              href: 03_per_tile_sequence_quality.html
+            - text: "Per Sequence Quality Score"
+              href: 04_per_sequence_quality_score.html
+            - text: "Per Base Sequence Content"
+              href: 05_per_base_sequence_content.html
+            - text: "Per Sequence GC Content"
+              href: 06_per_sequence_gc_content.html
+            - text: "Per Base N Content"
+              href: 07_per_base_n_content.html
+            - text: "Sequence Length Distribution"
+              href: 08_sequence_length_distribution.html
+            - text: "Sequence Duplication Levels"
+              href: 09_sequence_duplication_levels.html
+            - text: "Adapter Content"
+              href: 10_adapter_content.html
+            - text: "Kmer Content"
+              href: 11_kmer_content.html
+output:
+  html_document:
+    theme: cosmo
+    highlight: textmate
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastqc_report.Rmd	Tue Feb 27 10:37:12 2018 -0500
@@ -0,0 +1,69 @@
+---
+title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE,
+  eval = TRUE
+)
+```
+
+
+
+
+
+##
+
+##
+
+
+##
+
+
+##
+
+##
+
+##
+
+### Kmer Content {.tabset}
+
+#### Before
+
+```{r 'Kmer Content (before)', fig.width=10}
+kc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Kmer Content')
+knitr::kable(kc_1)
+```
+
+#### After
+```{r 'Kmer Content (after)', fig.width=10}
+kc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Kmer Content')
+knitr::kable(kc_2)
+```
+
+
+# Session Info
+
+```{r 'session info'}
+sessionInfo()
+```
+
+# References
+
+* Bioinformatics, Babraham (2014). FastQC.
+
+* Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob (2016). rmarkdown: Dynamic Documents for R, 2016. In R package version 0.9, 6.
+
+* Xie, Yihui (2015). Dynamic Documents with R and knitr, CRC Press, Vol.29.
+
+* Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and Marianne Corvellec and Pedro Despouy (2017). plotly: Create Interactive Web Graphics via 'plotly.js'. R package version 4.6.0. [Link]
+
+* Wickham, H. (2016). ggplot2: elegant graphics for data analysis. Springer. Chicago
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastqc_site.xml	Tue Feb 27 10:37:12 2018 -0500
@@ -0,0 +1,109 @@
+<tool name="Fastqc site" id='aurora_fastqc_site' version="2.1.0">
+    <description>
+        Evaluate short reads with FastQC software on a single or a paired of untrimmed and trimmed reads
+        files.
+    </description>
+    <requirements>
+        <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.3">r-rmarkdown</requirement>
+        <requirement type="package" version="1.8.4">r-plyr</requirement>
+        <requirement type="package" version="1.1.0">r-stringr</requirement>
+        <requirement type="package" version="0.2">r-dt</requirement>
+        <requirement type="package" version="1.4.2">r-reshape2</requirement>
+        <requirement type="package" version="4.5.6">r-plotly</requirement>
+        <requirement type="package" version="0.3.5">r-htmltools</requirement>
+        <requirement type="package" version="0.11.5">fastqc</requirement>
+        <requirement type="package" version="0.9.10">xorg-libxrender</requirement>
+        <requirement type="package" version="1.2.2">xorg-libsm</requirement>
+        <requirement type="package" version="6.0">unzip</requirement>
+    </requirements>
+    <stdio>
+        <regex match="XXX" source="stderr" level="warning"
+               description="Check the warnings_and_errors.txt file for more details."/>
+    </stdio>
+    <command><![CDATA[
+
+
+        Rscript '${__tool_directory__}/fastqc_site_render.R'
+            -e $echo
+            -r $reads_1
+            -n '$reads_1.name'
+            -R $reads_2
+            -N '$reads_2.name'
+            -c $contaminants
+            -l $limits
+
+		    -o $report
+		    -d $report.files_path
+		    -s $sink_message
+
+		    -t '${__tool_directory__}'
+
+    ]]></command>
+    <inputs>
+        <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="reads_1" type="data" optional="false"
+               label="Short reads before trimming"
+               help="Short reads data from history. This could be reads before trimming."/>
+        <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="reads_2" type="data" label="Short reads after trimming"
+               help="Short reads data from history. This could be reads after trimming."/>
+        <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list"
+               help="Specifies a non-default file which contains the list of adapter sequences which will be explicitly                      searched against the library. The file must contain sets of named adapters                      in the form name[tab]sequence.  Lines prefixed with a hash will be ignored."/>
+        <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file"
+               help="Specifies a non-default file which contains a set of criteria                     which will be used to determine the warn/error limits for the                     various modules.  This file can also be used to selectively                     remove some modules from the output all together.  The format                     needs to mirror the default limits.txt file found in the                     Configuration folder."/>
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"
+               label="Display analysis code in report?"/>
+    </inputs>
+    <outputs>
+        <data format="html" name="report" label="fastqc site"/>
+        <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/>
+    </outputs>
+    <citations>
+        <citation type="bibtex"><![CDATA[
+            @misc{bioinformatics2014fastqc,
+            title={FastQC},
+            author={Bioinformatics, Babraham},
+            year={2014}
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
+            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
+            @misc{plotly2017,
+            title = {plotly: Create Interactive Web Graphics via 'plotly.js'},
+            author = {Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and
+            Marianne Corvellec and Pedro Despouy},
+            year = {2017},
+            note = {R package version 4.6.0},
+            url = {https://CRAN.R-project.org/package=plotly},
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
+            @Book{ggplot22016,
+            author = {Hadley Wickham},
+            title = {ggplot2: Elegant Graphics for Data Analysis},
+            publisher = {Springer-Verlag New York},
+            year = {2009},
+            isbn = {978-0-387-98140-6},
+            url = {http://ggplot2.org},
+            }
+        ]]></citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastqc_site_render.R	Tue Feb 27 10:37:12 2018 -0500
@@ -0,0 +1,69 @@
+##============ Sink warnings and errors to a file ==============
+## use the sink() function to wrap all code within it.
+##==============================================================
+zz = file('warnings_and_errors.txt')
+sink(zz)
+sink(zz, type = 'message')
+
+library(getopt)
+library(rmarkdown)
+library(htmltools)
+library(plyr)
+library(dplyr)
+library(stringr)
+library(DT)
+library(reshape2)
+library(plotly)
+options(stringsAsFactors = FALSE)
+
+# getopt_specification_matrix(extract_short_flags('fastqc_report.xml')) %>%
+#   write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE)
+
+
+# get arguments into R
+spec_matrix = as.matrix(
+  data.frame(stringsAsFactors=FALSE,
+              long_flags = c("X_e", "X_r", "X_n", "X_R", "X_N", "X_c", "X_l",
+                             "X_o", "X_d", "X_s", "X_t"),
+             short_flags = c("e", "r", "n", "R", "N", "c", "l", "o", "d", "s",
+                             "t"),
+     argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
+         data_type_flags = c("character", "character", "character", "character",
+                             "character", "character", "character",
+                             "character", "character", "character", "character")
+  )
+)
+opt = getopt(spec_matrix)
+
+# using passed arguments in R to define system environment variables
+do.call(Sys.setenv, opt[-1])
+
+# create the output associated directory to store all outputs
+dir.create(opt$X_d, recursive = TRUE)
+
+#-----------------render site--------------
+# copy site generating materials into folder "_site" within the output associated directory
+file.copy(opt$X_t, opt$X_d, recursive = TRUE)
+# render site to the output associated directory
+render_site(input = paste0(opt$X_d, '/aurora_fastqc_site'))
+# remove site generating materials from output associated directory
+unlink(paste0(opt$X_d, '/aurora_fastqc_site'), recursive = TRUE)
+# move _site/* into output associated directory
+move_cmd = paste0('mv ', opt$X_d, '/_site/* ', opt$X_d)
+system(move_cmd)
+#------------------------------------------
+
+#-----link index.html to output-----
+cp_index = paste0('cp ', opt$X_d, '/index.html ', opt$X_o)
+system(cp_index)
+#-----------------------------------
+
+#==============the end==============
+
+
+
+# file.copy(paste0(opt$X_d, '/index.html'), opt$X_o, recursive = TRUE)
+
+##--------end of code rendering .Rmd templates----------------
+sink()
+##=========== End of sinking output=============================
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/index.Rmd	Tue Feb 27 10:37:12 2018 -0500
@@ -0,0 +1,22 @@
+---
+title: "FastQC Report"
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(echo = TRUE, error = TRUE)
+```
+
+
+
+## References
+
+* Bioinformatics, Babraham (2014). FastQC.
+
+* Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob (2016). rmarkdown: Dynamic Documents for R, 2016. In R package version 0.9, 6.
+
+* Xie, Yihui (2015). Dynamic Documents with R and knitr, CRC Press, Vol.29.
+
+* Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and Marianne Corvellec and Pedro Despouy (2017). plotly: Create Interactive Web Graphics via 'plotly.js'. R package version 4.6.0. [Link]
+
+* Wickham, H. (2016). ggplot2: elegant graphics for data analysis. Springer. Chicago