Repository 'rmarkdown_fastqc_site'
hg clone https://toolshed.g2.bx.psu.edu/repos/mingchen0919/rmarkdown_fastqc_site

Changeset 11:507eec497730 (2017-11-07)
Previous changeset 10:600c39b11913 (2017-08-15) Next changeset 12:68ea2ebbf866 (2017-11-09)
Commit message:
update fastqc site
modified:
01_evaluation_overview.Rmd
_site.yml
fastqc_site.xml
fastqc_site_render.R
index.Rmd
added:
02_per_base_sequence_quality.Rmd
03_per_tile_sequence_quality.Rmd
04_per_sequence_quality_score.Rmd
05_per_base_sequence_content.Rmd
06_per_sequence_gc_content.Rmd
07_per_base_n_content.Rmd
08_sequence_length_distribution.Rmd
09_sequence_duplication_levels.Rmd
10_adapter_content.Rmd
11_kmer_content.Rmd
removed:
02_fastqc_original_reports.Rmd
1_per_base_quality_scores.Rmd
2_per_base_N_content.Rmd
3_per_sequence_quality_scores.Rmd
4_per_sequence_GC_content.Rmd
5_per_base_sequence_content.Rmd
b
diff -r 600c39b11913 -r 507eec497730 01_evaluation_overview.Rmd
--- a/01_evaluation_overview.Rmd Tue Aug 15 15:50:21 2017 -0400
+++ b/01_evaluation_overview.Rmd Tue Nov 07 16:52:24 2017 -0500
[
b'@@ -1,123 +1,124 @@\n ---\n-title: "Evaluation Overview"\n-output: html_document\n+title: \'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)\'\n+output:\n+    html_document:\n+      number_sections: true\n+      toc: true\n+      theme: cosmo\n+      highlight: tango\n ---\n \n ```{r setup, include=FALSE, warning=FALSE, message=FALSE}\n-knitr::opts_chunk$set(echo = ECHO)\n-```\n-\n-```{bash \'copy data from datasets directory to working directory\', echo=FALSE}\n-# Copy uploaded data to the working directory\n-for f in $(echo READS | sed "s/,/ /g")\n-do\n-    cp $f ./\n-done\n-```\n-\n-```{bash \'run fastqc\', echo=FALSE}\n-# run fastqc and place outputs into the report directory\n-for r in $(ls *.dat)\n-do\n-    fastqc -o REPORT_OUTPUT_DIR $r > /dev/null 2>&1\n-done\n-```\n-\n-```{bash \'parse fastqc results\', echo=FALSE}\n-##==== copy fastqc generated zip files from report output directory to job work directory ==\n-cp -r REPORT_OUTPUT_DIR/*zip ./\n-\n-# create a file to store data file paths\n-echo "sample_id,file_path" > PWF_file_paths.txt # Pass, Warning, Fail\n-echo "sample_id,file_path" > PBQS_file_paths.txt # Per Base Quality Score\n-echo "sample_id,file_path" > PSQS_file_paths.txt # Per Sequence Quality Score\n-echo "sample_id,file_path" > PSGC_file_paths.txt # Per Sequence GC Content\n-echo "sample_id,file_path" > PBSC_file_paths.txt # Per Base Sequence Content\n-echo "sample_id,file_path" > PBNC_file_paths.txt # Per Base N Content\n-echo "sample_id,file_path" > SDL_file_paths.txt # Sequence Duplication Level\n-echo "sample_id,file_path" > SLD_file_paths.txt # Sequence Length Distribution\n-echo "sample_id,file_path" > KMC_file_paths.txt # Kmer Content\n-\n-for i in $(ls *.zip)\n-do\n-    BASE=$(echo $i | sed \'s/\\(.*\\)\\.zip/\\1/g\')\n-    echo $BASE\n-    unzip ${BASE}.zip > /dev/null 2>&1\n-    \n-    ##====== pass,warning,fail (WSF) =============\n-    awk \'/^>>/ {print}\' "$BASE"/fastqc_data.txt | grep -v \'END_MODULE\' | sed \'s/>>//\' > "$BASE"-PWF.txt\n-    echo "${BASE},${BASE}-PWF.txt" >> PWF_file_paths.txt\n-\n-    ##====== per base quality scores (PBQS) ======\n-    awk \'/^>>Per base sequence quality/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-PBQS.txt\n-    echo "${BASE},${BASE}-PBQS.txt" >> PBQS_file_paths.txt\n-\n-    ##====== per sequence quality scores (PSQS)\n-    awk \'/^>>Per sequence quality scores/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-PSQS.txt\n-    echo "${BASE},${BASE}-PSQS.txt" >> PSQS_file_paths.txt\n-\n-    ##====== Per sequence GC content (PSGC)\n-    awk \'/^>>Per sequence GC content/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-PSGC.txt\n-    echo "${BASE},${BASE}-PSGC.txt" >> PSGC_file_paths.txt\n-    \n-    ##====== Per Base Sequence Content (PBSC)\n-    awk \'/^>>Per base sequence content/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-PBSC.txt\n-    echo "${BASE},${BASE}-PBSC.txt" >> PBSC_file_paths.txt\n-    \n-    ##====== Per Base N Content (PBNC)\n-    awk \'/^>>Per base N content/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-PBNC.txt\n-    echo "${BASE},${BASE}-PBNC.txt" >> PBNC_file_paths.txt\n-    \n-    ##====== Sequence Duplication Level (SDL)\n-    awk \'/^>>Sequence Duplication Levels/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-SDL.txt\n-    echo "${BASE},${BASE}-SDL.txt" >> SDL_file_paths.txt\n-    \n-    ##====== Sequence Length Distribution (SLD)\n-    awk \'/^>>Sequence Length Distribution/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-SLD.txt\n-    echo "${BASE},${BASE}-SLD.txt" >> SLD_file_paths.txt\n-    \n-    ##====== Kmer Content ============\n-    awk \'/^>>Kmer Content/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-KMC.txt\n-    echo "${BASE},${BASE}-KMC.txt" >> KMC_file_paths.txt\n-    \n-done\n+knitr::opts_chunk$set(\n+  echo = ECHO,\n+  error = TRUE\n+)\n ```\n \n \n-## Evaluat'..b'  stop("No pre-trimming reads provided!")\n+} else {\n+  ## run fastqc evaluation\n+  fastqc_command = paste0(\'fastqc \') %>% \n+  (function(x) {\n+    ifelse(\'CONTAMINANTS\' != \'None\', paste0(x, \'-c CONTAMINANTS \'), x)\n+  }) %>% \n+  (function(x) {\n+    ifelse(\'LIMITS\' != \'None\', paste0(x, \'-l LIMITS \'), x)\n+  }) %>% \n+  (function(x) {\n+    paste0(x, \'-o REPORT_DIR \')\n+  })\n+  fastqc_command_reads_1 = paste0(fastqc_command, \'READS_1 > /dev/null 2>&1\')\n+  system(fastqc_command_reads_1, intern = TRUE)\n+  \n+  # Original html report\n+  reads_1_base = tail(strsplit(\'READS_1\', \'/\')[[1]], 1)\n+  original_html = tags$a(href=paste0(reads_1_base, \'_fastqc.html\'), paste0(\'HTML report: \', opt$name_1))\n+  \n+  unzip(paste0(\'REPORT_DIR/\', reads_1_base, \'_fastqc.zip\'), exdir = \'REPORT_DIR\')\n+  reads_1_unzip = paste0(\'REPORT_DIR/\', reads_1_base, \'_fastqc/\')\n+  # fastqc_data.txt\n+  file.copy(paste0(reads_1_unzip, \'fastqc_data.txt\'), \'REPORT_DIR/reads_1_fastqc_data.txt\')\n+  fastqc_data = tags$a(href=\'reads_1_fastqc_data.txt\', paste0(\'fastqc_data.txt: \', opt$name_1))\n+  # summary.txt\n+  file.copy(paste0(reads_1_unzip, \'summary.txt\'), \'REPORT_DIR/reads_1_summary.txt\')\n+  summary_data = tags$a(href=\'reads_1_summary.txt\', paste0(\'summary.txt: \', opt$name_1))\n+  \n+  tags$ul(\n+    tags$li(original_html),\n+    tags$li(fastqc_data),\n+    tags$li(summary_data)\n+  )\n }\n ```\n \n \n+## Evaluation of reads after trimming\n+\n ```{r}\n-my_icon = c(\'ok\', \'remove\', \'star\')\n-names(my_icon) = c(\'pass\', \'fail\', \'warn\')\n-evaluate_list = list()\n-for (i in colnames(PWF_df)[-1]) {\n-  evaluate_list[[i]] = formatter(\n-      "span", \n-      style = x ~ style("background-color" = ifelse(x ==\'pass\', \'#9CD027\', ifelse(x == \'fail\', \'#CC0000\', \'#FF4E00\')), \n-                        "color" = "white",\n-                        "width" = "50px",\n-                        "float" = "left",\n-                        "padding-right" = "5px")\n-    )\n+if (\'READS_2\' == \'None\') {\n+  stop("No pre-trimming reads provided!")\n+} else {\n+  ## run fastqc evaluation\n+  fastqc_command = paste0(\'fastqc \') %>% \n+  (function(x) {\n+    ifelse(\'CONTAMINANTS\' != \'None\', paste0(x, \'-c CONTAMINANTS \'), x)\n+  }) %>% \n+  (function(x) {\n+    ifelse(\'LIMITS\' != \'None\', paste0(x, \'-l LIMITS \'), x)\n+  }) %>% \n+  (function(x) {\n+    paste0(x, \'-o REPORT_DIR \')\n+  })\n+  fastqc_command_reads_2 = paste0(fastqc_command, \'READS_2 > /dev/null 2>&1\')\n+  system(fastqc_command_reads_2, intern = TRUE)\n+  \n+  # Original html report\n+  reads_2_base = tail(strsplit(\'READS_2\', \'/\')[[1]], 1)\n+  original_html = tags$a(href=paste0(reads_2_base, \'_fastqc.html\'), paste0(\'HTML report: \', opt$name_2))\n+  \n+  unzip(paste0(\'REPORT_DIR/\', reads_2_base, \'_fastqc.zip\'), exdir = \'REPORT_DIR\')\n+  reads_2_unzip = paste0(\'REPORT_DIR/\', reads_2_base, \'_fastqc/\')\n+  # fastqc_data.txt\n+  file.copy(paste0(reads_2_unzip, \'fastqc_data.txt\'), \'REPORT_DIR/reads_2_fastqc_data.txt\')\n+  fastqc_data = tags$a(href=\'reads_2_fastqc_data.txt\', paste0(\'fastqc_data.txt: \', opt$name_2))\n+  # summary.txt\n+  file.copy(paste0(reads_2_unzip, \'summary.txt\'), \'REPORT_DIR/reads_2_summary.txt\')\n+  summary_data = tags$a(href=\'reads_2_summary.txt\', paste0(\'summary.txt: \', opt$name_2))\n+  \n+  tags$ul(\n+    tags$li(original_html),\n+    tags$li(fastqc_data),\n+    tags$li(summary_data)\n+  )\n }\n+```\n \n-formattable(PWF_df, evaluate_list)\n+\n+\n+# Fastqc output visualization\n+\n+## Overview\n+\n+```{r}\n+reads_1_summary = read.csv(\'REPORT_DIR/reads_1_summary.txt\', header = FALSE, sep = \'\\t\')[, 2:1]\n+reads_2_summary = read.csv(\'REPORT_DIR/reads_2_summary.txt\', header = FALSE, sep = \'\\t\')[, 1]\n+combined_summary = cbind(reads_1_summary, reads_2_summary)\n+names(combined_summary) = c(\'MODULE\', paste0(opt$name_1, \'(before)\'), paste0(opt$name_2, \'(after)\'))\n+combined_summary[combined_summary == \'FAIL\'] = \'FAIL (X)\'\n+combined_summary[combined_summary == \'WARN\'] = \'WARN (!)\'\n+knitr::kable(combined_summary)\n+```\n+\n+# Session Info\n+\n+```{r \'session info\'}\n+sessionInfo()\n ```\n\\ No newline at end of file\n'
b
diff -r 600c39b11913 -r 507eec497730 02_fastqc_original_reports.Rmd
--- a/02_fastqc_original_reports.Rmd Tue Aug 15 15:50:21 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,20 +0,0 @@
----
-title: "FastQC original reports"
-output: html_document
----
-
-```{r 'FastQC original reports', include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-
-Below are links to ***Fastqc*** original html reports.
-
-```{r 'html report links'}
-html_report_list = list()
-html_files = list.files('REPORT_OUTPUT_DIR', pattern = '.*html')
-for (i in html_files) {
-  html_report_list[[i]] = tags$li(tags$a(href=i, i))
-}
-tags$ul(html_report_list)
-```
\ No newline at end of file
b
diff -r 600c39b11913 -r 507eec497730 02_per_base_sequence_quality.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/02_per_base_sequence_quality.Rmd Tue Nov 07 16:52:24 2017 -0500
b
@@ -0,0 +1,47 @@
+---
+title: 'Per base sequence quality'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Per base sequence quality
+
+```{r 'per base sequence quality', fig.width=10}
+## reads 1
+pbsq_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base sequence quality')
+pbsq_1$id = 1:length(pbsq_1$X.Base)
+
+melt_pbsq_1 = filter(melt(pbsq_1, id=c('X.Base', 'id')), variable == 'Mean')
+melt_pbsq_1$trim = 'before'
+
+
+## reads 2
+pbsq_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base sequence quality')
+pbsq_2$id = 1:length(pbsq_2$X.Base)
+
+melt_pbsq_2 = filter(melt(pbsq_2, id=c('X.Base', 'id')), variable == 'Mean')
+melt_pbsq_2$trim = 'after'
+
+comb_pbsq = rbind(melt_pbsq_1, melt_pbsq_2)
+comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim)
+
+p = ggplot(data = comb_pbsq) +
+  geom_line(mapping = aes(x = id, y = value, group = variable, color = variable)) +
+  scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) + 
+  facet_grid(. ~ trim) + 
+  ylim(0, max(comb_pbsq$value) + 5) +
+  theme(axis.text.x = element_text(angle=45))
+ggplotly(p)
+
+```
\ No newline at end of file
b
diff -r 600c39b11913 -r 507eec497730 03_per_tile_sequence_quality.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/03_per_tile_sequence_quality.Rmd Tue Nov 07 16:52:24 2017 -0500
b
@@ -0,0 +1,48 @@
+---
+title: 'Per Tile Sequence Quality'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Per tile sequence quality
+
+```{r 'per tile sequence quality', fig.width=10}
+## check if 'per tile sequence quality' module exits or not
+check_ptsq = grep('Per tile sequence quality', readLines('REPORT_DIR/reads_1_fastqc_data.txt'))
+if (length(check_ptsq) > 0) {
+    ## reads 1
+  ptsq_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per tile sequence quality')
+  ptsq_1$trim = 'before'
+  
+  ## reads 2
+  ptsq_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per tile sequence quality')
+  ptsq_2$trim = 'after'
+  
+  comb_ptsq = rbind(ptsq_1, ptsq_2)
+  comb_ptsq$trim = factor(levels = c('before', 'after'), comb_ptsq$trim)
+  comb_ptsq$Base = factor(levels = unique(comb_ptsq$Base), comb_ptsq$Base)
+  
+  # convert integers to charaters
+  comb_ptsq$Tile = as.character(comb_ptsq$X.Tile)
+  
+  p = ggplot(data = comb_ptsq, aes(x = Base, y = Tile, fill = Mean)) +
+    geom_raster() + 
+    facet_grid(. ~ trim) + 
+    xlab('Position in read (bp)') + 
+    ylab('') +
+    theme(axis.text.x = element_text(angle=45))
+  ggplotly(p)
+} else {
+  print('No "per tile sequence quality" data')
+}
\ No newline at end of file
b
diff -r 600c39b11913 -r 507eec497730 04_per_sequence_quality_score.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/04_per_sequence_quality_score.Rmd Tue Nov 07 16:52:24 2017 -0500
b
@@ -0,0 +1,39 @@
+---
+title: 'Per sequence quality score'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Per sequence quality score
+
+```{r 'Per sequence quality score', fig.width=10}
+## reads 1
+psqs_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per sequence quality scores')
+psqs_1$trim = 'before'
+
+## reads 2
+psqs_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per sequence quality scores')
+psqs_2$trim = 'after'
+
+comb_psqs = rbind(psqs_1, psqs_2)
+comb_psqs$trim = factor(levels = c('before', 'after'), comb_psqs$trim)
+
+p = ggplot(data = comb_psqs, aes(x = X.Quality, y = Count)) + 
+  geom_line(color = 'red') + 
+  facet_grid(. ~ trim) + 
+  xlim(min(comb_psqs$X.Quality), max(comb_psqs$X.Quality)) + 
+  xlab('Mean Sequence Qaulity (Phred Score)') + 
+  ylab('')
+ggplotly(p)
+```
\ No newline at end of file
b
diff -r 600c39b11913 -r 507eec497730 05_per_base_sequence_content.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/05_per_base_sequence_content.Rmd Tue Nov 07 16:52:24 2017 -0500
b
@@ -0,0 +1,47 @@
+---
+title: 'Per base sequence content'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Per base sequence content
+
+```{r 'Per base sequence content', fig.width=10}
+## reads 1
+pbsc_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base sequence content')
+pbsc_1$id = 1:length(pbsc_1$X.Base)
+
+melt_pbsc_1 = melt(pbsc_1, id=c('X.Base', 'id'))
+melt_pbsc_1$trim = 'before'
+
+
+## reads 2
+pbsc_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base sequence content')
+pbsc_2$id = 1:length(pbsc_2$X.Base)
+
+melt_pbsc_2 = melt(pbsc_2, id=c('X.Base', 'id'))
+melt_pbsc_2$trim = 'after'
+
+comb_pbsc = rbind(melt_pbsc_1, melt_pbsc_2)
+comb_pbsc$trim = factor(levels = c('before', 'after'), comb_pbsc$trim)
+
+p = ggplot(data = comb_pbsc, aes(x = id, y = value, color = variable)) +
+  geom_line() +
+  facet_grid(. ~ trim) +
+  xlim(min(comb_pbsc$id), max(comb_pbsc$id)) + 
+  ylim(0, 100) +
+  xlab('Position in read (bp)') +
+  ylab('')
+ggplotly(p)
+```
\ No newline at end of file
b
diff -r 600c39b11913 -r 507eec497730 06_per_sequence_gc_content.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/06_per_sequence_gc_content.Rmd Tue Nov 07 16:52:24 2017 -0500
b
@@ -0,0 +1,38 @@
+---
+title: 'Per sequence GC content'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Per sequence GC content
+
+```{r 'Per sequence GC content', fig.width=10}
+## reads 1
+psGCc_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per sequence GC content')
+psGCc_1$trim = 'before'
+
+## reads 2
+psGCc_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per sequence GC content')
+psGCc_2$trim = 'after'
+
+comb_psGCc = rbind(psGCc_1, psGCc_2)
+comb_psGCc$trim = factor(levels = c('before', 'after'), comb_psGCc$trim)
+
+p = ggplot(data = comb_psGCc, aes(x = X.GC.Content, y = Count)) +
+  geom_line(color = 'red') +
+  facet_grid(. ~ trim) +
+  xlab('Mean Sequence Qaulity (Phred Score)') +
+  ylab('')
+ggplotly(p)
+```
\ No newline at end of file
b
diff -r 600c39b11913 -r 507eec497730 07_per_base_n_content.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/07_per_base_n_content.Rmd Tue Nov 07 16:52:24 2017 -0500
b
@@ -0,0 +1,43 @@
+---
+title: 'Per base N content'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Per base N content
+
+```{r 'Per base N content', fig.width=10}
+## reads 1
+pbNc_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base N content')
+pbNc_1$id = 1:length(pbNc_1$X.Base)
+pbNc_1$trim = 'before'
+
+## reads 2
+pbNc_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base N content')
+pbNc_2$id = 1:length(pbNc_2$X.Base)
+pbNc_2$trim = 'after'
+
+comb_pbNc = rbind(pbNc_1, pbNc_2)
+comb_pbNc$trim = factor(levels = c('before', 'after'), comb_pbNc$trim)
+
+p = ggplot(data = comb_pbNc, aes(x = id, y = N.Count)) +
+  geom_line(color = 'red') +
+  scale_x_continuous(breaks = pbNc_2$id, labels = pbNc_2$X.Base) + 
+  facet_grid(. ~ trim) +
+  ylim(0, 1) + 
+  xlab('N-Count') +
+  ylab('') + 
+  theme(axis.text.x = element_text(angle=45))
+ggplotly(p)
+```
\ No newline at end of file
b
diff -r 600c39b11913 -r 507eec497730 08_sequence_length_distribution.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/08_sequence_length_distribution.Rmd Tue Nov 07 16:52:24 2017 -0500
b
@@ -0,0 +1,42 @@
+---
+title: 'Sequence Length Distribution'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Sequence Length Distribution
+
+```{r 'Sequence Length Distribution', fig.width=10}
+## reads 1
+sld_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Sequence Length Distribution')
+sld_1$id = 1:length(sld_1$X.Length)
+sld_1$trim = 'before'
+
+## reads 2
+sld_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Sequence Length Distribution')
+sld_2$id = 1:length(sld_2$X.Length)
+sld_2$trim = 'after'
+
+comb_sld = rbind(sld_1, sld_2)
+comb_sld$trim = factor(levels = c('before', 'after'), comb_sld$trim)
+
+p = ggplot(data = comb_sld, aes(x = id, y = Count)) +
+  geom_line(color = 'red') +
+  scale_x_continuous(breaks = sld_2$id, labels = sld_2$X.Length) + 
+  facet_grid(. ~ trim) +
+  xlab('Sequence Length (bp)') +
+  ylab('') + 
+  theme(axis.text.x = element_text(angle=45))
+ggplotly(p)
+```
\ No newline at end of file
b
diff -r 600c39b11913 -r 507eec497730 09_sequence_duplication_levels.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/09_sequence_duplication_levels.Rmd Tue Nov 07 16:52:24 2017 -0500
b
@@ -0,0 +1,49 @@
+---
+title: 'Sequence Duplication Levels'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Sequence Duplication Levels
+
+```{r 'Sequence Duplication Levels', fig.width=10}
+## reads 1
+sdl_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Sequence Duplication Levels', header = FALSE, comment.char = '#')
+names(sdl_1) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total')
+sdl_1$id = 1:length(sdl_1$Duplication_Level)
+
+melt_sdl_1 = melt(sdl_1, id=c('Duplication_Level', 'id'))
+melt_sdl_1$trim = 'before'
+
+
+## reads 2
+sdl_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Sequence Duplication Levels', header = FALSE, comment.char = '#')
+names(sdl_2) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total')
+sdl_2$id = 1:length(sdl_2$Duplication_Level)
+
+melt_sdl_2 = melt(sdl_2, id=c('Duplication_Level', 'id'))
+melt_sdl_2$trim = 'after'
+
+comb_sdl = rbind(melt_sdl_1, melt_sdl_2)
+comb_sdl$trim = factor(levels = c('before', 'after'), comb_sdl$trim)
+
+p = ggplot(data = comb_sdl, aes(x = id, y = value, color = variable)) +
+  geom_line() +
+  scale_x_continuous(breaks = sdl_2$id, labels = sdl_2$Duplication_Level) +
+  facet_grid(. ~ trim) +
+  xlab('Sequence Duplication Level') +
+  ylab('') + 
+  theme(axis.text.x = element_text(angle=45))
+ggplotly(p)
+```
\ No newline at end of file
b
diff -r 600c39b11913 -r 507eec497730 10_adapter_content.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/10_adapter_content.Rmd Tue Nov 07 16:52:24 2017 -0500
b
@@ -0,0 +1,46 @@
+---
+title: 'Adapter Content'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Adapter Content
+
+```{r 'Adapter Content', fig.width=10}
+## reads 1
+ac_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Adapter Content')
+ac_1$id = 1:length(ac_1$X.Position)
+
+melt_ac_1 = melt(ac_1, id=c('X.Position', 'id'))
+melt_ac_1$trim = 'before'
+
+## reads 2
+ac_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Adapter Content')
+ac_2$id = 1:length(ac_2$X.Position)
+
+melt_ac_2 = melt(ac_2, id=c('X.Position', 'id'))
+melt_ac_2$trim = 'after'
+
+comb_ac = rbind(melt_ac_1, melt_ac_2)
+comb_ac$trim = factor(levels = c('before', 'after'), comb_ac$trim)
+
+p = ggplot(data = comb_ac, aes(x = id, y = value, color = variable)) +
+  geom_line() +
+  facet_grid(. ~ trim) +
+  xlim(min(comb_ac$id), max(comb_ac$id)) + 
+  ylim(0, 1) +
+  xlab('Position in read (bp)') +
+  ylab('')
+ggplotly(p)
+```
\ No newline at end of file
b
diff -r 600c39b11913 -r 507eec497730 11_kmer_content.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/11_kmer_content.Rmd Tue Nov 07 16:52:24 2017 -0500
b
@@ -0,0 +1,31 @@
+---
+title: 'Kmer Content'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Kmer Content {.tabset}
+
+#### Before
+
+```{r 'Kmer Content (before)', fig.width=10}
+kc_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Kmer Content')
+knitr::kable(kc_1)
+```
+
+#### After
+```{r 'Kmer Content (after)', fig.width=10}
+kc_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Kmer Content')
+knitr::kable(kc_2)
+```
\ No newline at end of file
b
diff -r 600c39b11913 -r 507eec497730 1_per_base_quality_scores.Rmd
--- a/1_per_base_quality_scores.Rmd Tue Aug 15 15:50:21 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,62 +0,0 @@
----
-title: "Per Base Quality Scores"
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-
-## Per Base Quality Scores
-
-```{r}
-PBQS_df = data.frame()
-PBQS_file_paths = read.csv('PBQS_file_paths.txt',
-                           header = TRUE, stringsAsFactors = FALSE)
-for(i in 1:nrow(PBQS_file_paths)) {
-  # file_path = paste0('REPORT_OUTPUT_DIR/', PBQS_file_paths[i,2])
-  file_path = PBQS_file_paths[i,2]
-  pbqs_df = read.csv(file_path,
-                     sep='\t', header=TRUE, stringsAsFactors = FALSE) %>%
-    mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]),
-           Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>%
-  (function (df) {
-    df1 = select(df, -Base2)
-    df2 = select(df, -Base1) %>% filter(Base2 != '')
-    colnames(df1) = c(colnames(df1)[1:7], 'Base')
-    colnames(df2) = c(colnames(df2)[1:7], 'Base')
-    res = rbind(df1, df2) %>% arrange(Base)
-    return(res)
-  })
-  pbqs_df$sample_id = rep(PBQS_file_paths[i,1], nrow(pbqs_df))
-  PBQS_df = rbind(PBQS_df, pbqs_df)
-}
-```
-
-
-```{r}
-# datatable(PBQS_df)
-max_phred = max(PBQS_df$Mean) + 10
-hchart(PBQS_df, "line", hcaes(x = Base, y = Mean, group = sample_id)) %>%
-  hc_title(
-    text = "Per Base Quality Score"
-  ) %>%
-  hc_yAxis(
-    title = list(text = "Mean Base Quality Score"),
-    min = 0,
-    max = max_phred,
-    plotLines = list(
-      list(label = list(text = "Phred Score = 27"),
-           width = 2,
-           dashStyle = "dash",
-           color = "green",
-           value = 27),
-      list(label = list(text = "Phred Score = 20"),
-           width = 2,
-           color = "red",
-           value = 20)
-    )
-  ) %>% 
-  hc_exporting(enabled = TRUE)
-```
b
diff -r 600c39b11913 -r 507eec497730 2_per_base_N_content.Rmd
--- a/2_per_base_N_content.Rmd Tue Aug 15 15:50:21 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,58 +0,0 @@
----
-title: "Per Base N Content"
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-## Per Base N Content
-
-```{r}
-PBNC_df = data.frame()
-PBNC_file_paths = read.csv('PBNC_file_paths.txt',
-                           header = TRUE, stringsAsFactors = FALSE)
-for(i in 1:nrow(PBNC_file_paths)) {
-  # file_path = paste0('REPORT_OUTPUT_DIR/', PBNC_file_paths[i,2])
-  file_path = PBNC_file_paths[i,2]
-  pbnc_df = read.csv(file_path,
-                     sep='\t', header=TRUE, stringsAsFactors = FALSE) %>%
-    mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]),
-           Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>%
-  (function (df) {
-    df1 = select(df, -Base2)
-    df2 = select(df, -Base1) %>% filter(Base2 != '')
-    colnames(df1) = c(colnames(df1)[1:2], 'Base')
-    colnames(df2) = c(colnames(df2)[1:2], 'Base')
-    res = rbind(df1, df2) %>% arrange(Base)
-    return(res)
-  })
-  pbnc_df$sample_id = rep(PBNC_file_paths[i,1], nrow(pbnc_df))
-  PBNC_df = rbind(PBNC_df, pbnc_df)
-}
-```
-
-
-```{r}
-PBNC_df$N.Count = PBNC_df$N.Count * 100
-max_phred = max(PBNC_df$N.Count) + 5
-hchart(PBNC_df, "line", hcaes(x = as.character(Base), y = N.Count, group = sample_id)) %>%
-  hc_title(
-    text = "Per Base N Content"
-  ) %>%
-  hc_xAxis(
-    title = list(text = "Base Position")
-  ) %>%
-  hc_yAxis(
-    title = list(text = "N %"),
-    plotLines = list(
-      list(label = list(text = "N = 5%"),
-           width = 2,
-           dashStyle = "dash",
-           color = "red",
-           value = 5)
-    )
-  ) %>% 
-  hc_exporting(enabled = TRUE)
-```
b
diff -r 600c39b11913 -r 507eec497730 3_per_sequence_quality_scores.Rmd
--- a/3_per_sequence_quality_scores.Rmd Tue Aug 15 15:50:21 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,50 +0,0 @@
----
-title: "Per Sequence Quality Scores"
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-## Per Sequence Quality Scores
-
-```{r}
-PSQS_df = data.frame()
-PSQS_file_paths = read.csv('PSQS_file_paths.txt', 
-                           header = TRUE, stringsAsFactors = FALSE)
-for(i in 1:nrow(PSQS_file_paths)) {
-  # file_path = paste0('REPORT_OUTPUT_DIR/', PSQS_file_paths[i,2])
-  file_path = PSQS_file_paths[i,2]
-  psqs_df = read.csv(file_path,
-                     sep='\t', header=TRUE, stringsAsFactors = FALSE) 
-  psqs_df$sample_id = rep(PSQS_file_paths[i,1], nrow(psqs_df))
-  PSQS_df = rbind(PSQS_df, psqs_df)
-}
-```
-
-
-```{r}
-max_phred = max(PSQS_df$X.Quality) + 5
-hchart(PSQS_df, "line", hcaes(x = X.Quality, y = Count, group = sample_id)) %>%
-  hc_title(
-    text = "Per Sequence Quality Score"
-  ) %>%
-  hc_xAxis(
-    title = list(text = "Mean Sequence Quality Score"),
-    min = 0,
-    max = max_phred,
-    plotLines = list(
-      list(label = list(text = "Phred Score = 27"),
-           width = 2,
-           dashStyle = "dash",
-           color = "green",
-           value = 27),
-      list(label = list(text = "Phred Score = 20"),
-           width = 2,
-           color = "red",
-           value = 20)
-    )
-  ) %>% 
-  hc_exporting(enabled = TRUE)
-```
b
diff -r 600c39b11913 -r 507eec497730 4_per_sequence_GC_content.Rmd
--- a/4_per_sequence_GC_content.Rmd Tue Aug 15 15:50:21 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,38 +0,0 @@
----
-title: "Per Sequence GC Content"
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-## Per Sequence GC Content
-
-
-```{r}
-PSGC_df = data.frame()
-PSGC_file_paths = read.csv('PSGC_file_paths.txt', 
-                           header = TRUE, stringsAsFactors = FALSE)
-for(i in 1:nrow(PSGC_file_paths)) {
-  # file_path = paste0('REPORT_OUTPUT_DIR/', PSGC_file_paths[i,2])
-  file_path = PSGC_file_paths[i,2]
-  psgc_df = read.csv(file_path,
-                     sep='\t', header=TRUE, stringsAsFactors = FALSE) 
-  psgc_df$sample_id = rep(PSGC_file_paths[i,1], nrow(psgc_df))
-  PSGC_df = rbind(PSGC_df, psgc_df)
-}
-```
-
-
-```{r}
-max_phred = max(PSGC_df$Count) + 5
-hchart(PSGC_df, "line", hcaes(x = X.GC.Content, y = Count, group = sample_id)) %>%
-  hc_title(
-    text = "Per Sequence GC Content"
-  ) %>%
-  hc_xAxis(
-    title = list(text = "% GC")
-  ) %>%
-  hc_exporting(enabled = TRUE)
-```
b
diff -r 600c39b11913 -r 507eec497730 5_per_base_sequence_content.Rmd
--- a/5_per_base_sequence_content.Rmd Tue Aug 15 15:50:21 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,45 +0,0 @@
----
-title: "Per Base Sequence Content"
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-## Per Base Sequence Content
-
-```{r}
-PBSC_df = data.frame()
-PBSC_file_paths = read.csv('PBSC_file_paths.txt',
-                           header = TRUE, stringsAsFactors = FALSE)
-for(i in 1:nrow(PBSC_file_paths)) {
-  # file_path = paste0('REPORT_OUTPUT_DIR/', PBSC_file_paths[i,2])
-  file_path = PBSC_file_paths[i,2]
-  pbsc_df = read.csv(file_path,
-                     sep='\t', header=TRUE, stringsAsFactors = FALSE) %>%
-    mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]),
-           Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>%
-  (function (df) {
-    df1 = select(df, -Base2)
-    df2 = select(df, -Base1) %>% filter(Base2 != '')
-    colnames(df1) = c(colnames(df1)[1:5], 'Base')
-    colnames(df2) = c(colnames(df2)[1:5], 'Base')
-    res = rbind(df1, df2) %>% arrange(Base)
-    return(res)
-  })
-  pbsc_df$sample_id = rep(PBSC_file_paths[i,1], nrow(pbsc_df))
-  PBSC_df = rbind(PBSC_df, pbsc_df)
-}
-```
-
-
-```{r out.width="100%"}
-PBSC_df_2 = select(PBSC_df, -X.Base) %>%
-  melt(id = c('Base', 'sample_id'), value.name = 'base_percentage')
-p = ggplot(data = PBSC_df_2, aes(x = Base, y = base_percentage, group = variable, color = variable)) +
-  geom_line() +
-  facet_wrap(~ sample_id)
-ggplotly(p)
-```
-
b
diff -r 600c39b11913 -r 507eec497730 _site.yml
--- a/_site.yml Tue Aug 15 15:50:21 2017 -0400
+++ b/_site.yml Tue Nov 07 16:52:24 2017 -0500
b
@@ -8,21 +8,29 @@
           icon: fa-home
           href: index.html
         - text: "Evaluation Overview"
-          href: 01_evaluation_overview.html
-        - text: "Evaluation Items"
+          href: x01_evaluation_overview.html
+        - text: "Evaluation by data module"
           menu:
-            - text: "Per Base Quality Scores"
-              href: 1_per_base_quality_scores.html
+            - text: "Per Base Sequence Quality"
+              href: x02_per_base_sequence_quality.html
+            - text: "Per Tile Sequence Quality"
+              href: x03_per_tile_sequence_quality.html
+            - text: "Per Sequence Quality Score"
+              href: x04_per_sequence_quality_score.html
+            - text: "Per Base Sequence Content"
+              href: x05_per_base_sequence_content.html
+            - text: "Per Sequence GC Content"
+              href: x06_per_sequence_gc_content.html
             - text: "Per Base N Content"
-              href: 2_per_base_N_content.html
-            - text: "Per Sequence Quality Scores"
-              href: 3_per_sequence_quality_scores.html
-            - text: "Per Sequence GC Content"
-              href: 4_per_sequence_GC_content.html
-            - text: "Per Base Sequence Content"
-              href: 5_per_base_sequence_content.html
-        - text: "Original FastQC Reports"
-          href: 02_fastqc_original_reports.html
+              href: x07_per_base_n_content.html
+            - text: "Sequence Length Distribution"
+              href: x08_sequence_length_distribution.html
+            - text: "Sequence Duplication Levels"
+              href: x09_sequence_duplication_levels.html
+            - text: "Adapter Content"
+              href: x10_adapter_content.html
+            - text: "Kmer Content"
+              href: x11_kmer_content.html
 output:
   html_document:
     theme: cosmo
b
diff -r 600c39b11913 -r 507eec497730 fastqc_site.xml
--- a/fastqc_site.xml Tue Aug 15 15:50:21 2017 -0400
+++ b/fastqc_site.xml Tue Nov 07 16:52:24 2017 -0500
[
b'@@ -1,9 +1,9 @@\n-<tool id="fastqc_site" name="Fastqc Site" version="1.0.0">\n+<tool id="fastqc_site" name="Fastqc Site" version="2.0.0">\n     <requirements>\n         <requirement type="package" version="1.15.0.6-0">pandoc</requirement>\n         <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement>\n         <requirement type="package" version="1.20.0">r-getopt</requirement>\n-        <requirement type="package" version="1.2">r-rmarkdown</requirement>\n+        <requirement type="package" version="1.3">r-rmarkdown</requirement>\n         <requirement type="package" version="1.8.4">r-plyr</requirement>\n         <requirement type="package" version="1.1.0">r-stringr</requirement>\n         <requirement type="package" version="0.5.0">r-highcharter</requirement>\n@@ -14,22 +14,12 @@\n         <requirement type="package" version="0.3.5">r-htmltools</requirement>\n         <requirement type="package" version="0.11.5">fastqc</requirement>\n     </requirements>\n-    <description>\n-        Implements FastQC analysis and display results in R Markdown website.\n-    </description>\n     <stdio>\n-        <regex match="Execution halted"\n-               source="both"\n-               level="fatal"\n-               description="Execution halted." />\n-        <regex match="Error in"\n-               source="both"\n-               level="fatal"\n-               description="An undefined error occured, please check your intput carefully and contact your administrator." />\n-        <regex match="Fatal error"\n-               source="both"\n-               level="fatal"\n-               description="An undefined error occured, please check your intput carefully and contact your administrator." />\n+        <!--redirecting stderr to a file. "XXX" is used to match with nothing so that tool running won\'t be interrupted during testing-->\n+        <regex match="XXX"\n+               source="stderr"\n+               level="warning"\n+               description="Check the warnings_and_errors.txt file for more details."/>\n     </stdio>\n     <command>\n         <![CDATA[\n@@ -37,36 +27,65 @@\n         Rscript \'${__tool_directory__}/fastqc_site_render.R\'\n \n             ## 1. input data\n-            -r $reads\n             -e $echo\n+            -r $reads_1\n+            -n \'$reads_1.name\'\n+            -R $reads_2\n+            -N \'$reads_2.name\'\n+            -c $contaminants\n+            -l $limits\n \n             ## 2. output report and report site directory\n-\t\t    -o $fastqc_site\n-\t\t    -d $fastqc_site.files_path\n+\t\t    -o $report\n+\t\t    -d $report.files_path\n+\t\t    -s $sink_message\n \n \t\t    ## 3. Rmd templates sitting in the tool directory\n \n-\t\t        ## _site.yml and index.Rmd template files\n-                -s \'${__tool_directory__}/_site.yml\'\n-                -i \'${__tool_directory__}/index.Rmd\'\n+\t\t    ## _site.yml and index.Rmd template files\n+            -S \'${__tool_directory__}/_site.yml\'\n+            -I \'${__tool_directory__}/index.Rmd\'\n \n-                ## other Rmd body template files\n-\t\t        -p  \'${__tool_directory__}/01_evaluation_overview.Rmd\'\n-\t\t        -a  \'${__tool_directory__}/02_fastqc_original_reports.Rmd\'\n-\t\t        -b  \'${__tool_directory__}/1_per_base_quality_scores.Rmd\'\n-\t\t        -c  \'${__tool_directory__}/2_per_base_N_content.Rmd\'\n-\t\t        -f  \'${__tool_directory__}/3_per_sequence_quality_scores.Rmd\'\n-\t\t        -g  \'${__tool_directory__}/4_per_sequence_GC_content.Rmd\'\n-\t\t        -h  \'${__tool_directory__}/5_per_base_sequence_content.Rmd\'\n+            ## other Rmd body template files\n+\t\t    -A \'${__tool_directory__}/01_evaluation_overview.Rmd\'\n+\t\t    -B \'${__tool_directory__}/02_per_base_sequence_quality.Rmd\'\n+\t\t    -C \'${__tool_directory__}/03_per_tile_sequence_quality.Rmd\'\n+\t\t    -D \'${__tool_directory__}/04_per_sequence_quality_score.Rmd\'\n+\t\t    -E \'${__tool_directory__}/05_per_base_sequence_content.Rmd\'\n+\t\t    -F \'${__tool_directory__}/06_per_sequence_gc_content.Rmd\'\n+\t\t    -G \'${__tool_directory__}/07_per_base_n_content.Rmd\'\n'..b't="fastq,fastq.gz,fastq.bz2,bam,sam" name="reads_2" type="data"\n+               label="Short reads after trimming"\n+               help="Short reads data from history. This could be reads after trimming."/>\n+        <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list"\n+               help="Specifies a non-default file which contains the list of adapter sequences which will be explicitly\n+                     searched against the library. The file must contain sets of named adapters\n+                     in the form name[tab]sequence.  Lines prefixed with a hash will be ignored."/>\n+        <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file"\n+               help="Specifies a non-default file which contains a set of criteria\n+                    which will be used to determine the warn/error limits for the\n+                    various modules.  This file can also be used to selectively\n+                    remove some modules from the output all together.  The format\n+                    needs to mirror the default limits.txt file found in the\n+                    Configuration folder."/>\n+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"\n+               label="Display analysis code in report?"/>\n     </inputs>\n     <outputs>\n-        <data format="html" name="fastqc_site" label="fastqc site" />\n+        <data format="html" name="report" label="fastqc site"/>\n+        <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/>\n     </outputs>\n     <citations>\n         <citation type="bibtex">\n@@ -79,7 +98,8 @@\n         <citation type="bibtex">\n             @article{allaire2016rmarkdown,\n             title={rmarkdown: Dynamic Documents for R, 2016},\n-            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},\n+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff\n+            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},\n             journal={R package version 0.9},\n             volume={6},\n             year={2016}\n@@ -97,31 +117,14 @@\n         <citation type="bibtex">\n             @misc{plotly2017,\n             title = {plotly: Create Interactive Web Graphics via \'plotly.js\'},\n-            author = {Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and Marianne Corvellec and Pedro Despouy},\n+            author = {Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and\n+            Marianne Corvellec and Pedro Despouy},\n             year = {2017},\n             note = {R package version 4.6.0},\n             url = {https://CRAN.R-project.org/package=plotly},\n             }\n         </citation>\n         <citation type="bibtex">\n-            @misc{highcharter2017,\n-            title = {highcharter: A Wrapper for the \'Highcharts\' Library},\n-            author = {Joshua Kunst},\n-            year = {2017},\n-            note = {R package version 0.5.0},\n-            url = {https://CRAN.R-project.org/package=highcharter},\n-            }\n-        </citation>\n-        <citation type="bibtex">\n-            @misc{formattable2016,\n-            title = {formattable: Create \'Formattable\' Data Structures},\n-            author = {Kun Ren and Kenton Russell},\n-            year = {2016},\n-            note = {R package version 0.2.0.1},\n-            url = {https://CRAN.R-project.org/package=formattable},\n-            }\n-        </citation>\n-        <citation>\n             @article{ewels2016multiqc,\n             title={MultiQC: summarize analysis results for multiple tools and samples in a single report},\n             author={Ewels, Philip and Magnusson, M{\\aa}ns and Lundin, Sverker and K{\\"a}ller, Max},\n'
b
diff -r 600c39b11913 -r 507eec497730 fastqc_site_render.R
--- a/fastqc_site_render.R Tue Aug 15 15:50:21 2017 -0400
+++ b/fastqc_site_render.R Tue Nov 07 16:52:24 2017 -0500
[
b'@@ -1,195 +1,283 @@\n-##======= Handle arguments from command line ========\n-# setup R error handline to go to stderr\n-options(show.error.messages=FALSE,\n-        error=function(){\n-          cat(geterrmessage(), file=stderr())\n-          quit("no", 1, F)\n-        })\n-\n-# we need that to not crash galaxy with an UTF8 error on German LC settings.\n-loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")\n-\n-# suppress warning\n-options(warn = -1)\n-\n-options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)\n-args = commandArgs(trailingOnly=TRUE)\n-\n-suppressPackageStartupMessages({\n-  library(getopt)\n-  library(tools)\n-})\n-\n-# column 1: the long flag name\n-# column 2: the short flag alias. A SINGLE character string\n-# column 3: argument mask\n-#           0: no argument\n-#           1: argument required\n-#           2: argument is optional\n-# column 4: date type to which the flag\'s argument shall be cast.\n-#           possible values: logical, integer, double, complex, character.\n-spec_list=list()\n-\n-##------- 1. input data ---------------------\n-spec_list$READS = c(\'reads\', \'r\', \'1\', \'character\')\n-spec_list$ECHO = c(\'echo\', \'e\', \'1\', \'character\')\n-\n-##--------2. output report and report site directory --------------\n-spec_list$FASTQC_SITE = c(\'fastqc_site\', \'o\', \'1\', \'character\')\n-spec_list$FASTQC_SITE_DIR = c(\'fastqc_site_dir\', \'d\', \'1\', \'character\')\n-\n-##--------3. Rmd templates sitting in the tool directory ----------\n-\n-    ## _site.yml and index.Rmd files\n-    spec_list$SITE_YML = c(\'site_yml\', \'s\', 1, \'character\')\n-    spec_list$INDEX_Rmd = c(\'index_rmd\', \'i\', 1, \'character\')\n-    \n-    ## other Rmd body template files\n-    spec_list$x01 = c(\'x01_evaluation_overview\', \'p\', \'1\', \'character\')\n-    spec_list$x02 = c(\'x02_fastqc_original_reports\', \'a\', \'1\', \'character\')\n-    spec_list$x1 = c(\'x1_per_base_quality_scores\', \'b\', \'1\', \'character\')\n-    spec_list$x2 = c(\'x2_per_base_N_content\', \'c\', \'1\', \'character\')\n-    spec_list$x3 = c(\'x3_per_sequence_quality_scores\', \'f\', \'1\', \'character\')\n-    spec_list$x4 = c(\'x4_per_sequence_GC_content\', \'g\', \'1\', \'character\')\n-    spec_list$x5 = c(\'x5_per_base_sequence_content\', \'h\', \'1\', \'character\')\n-\n-##------------------------------------------------------------------\n-\n-spec = t(as.data.frame(spec_list))\n-opt = getopt(spec)\n-# arguments are accessed by long flag name (the first column in the spec matrix)\n-#                        NOT by element name in the spec_list\n-# example: opt$help, opt$expression_file\n-##====== End of arguments handling ==========\n-\n-#------ Load libraries ---------\n+library(getopt)\n library(rmarkdown)\n+library(htmltools)\n library(plyr)\n+library(dplyr)\n library(stringr)\n-library(dplyr)\n library(highcharter)\n library(DT)\n library(reshape2)\n library(plotly)\n library(formattable)\n-library(htmltools)\n-\n+options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)\n \n-#----- 1. create the report directory ------------------------\n-paste0(\'mkdir -p \', opt$fastqc_site_dir) %>%\n-  system()\n-\n-#----- 2. generate Rmd files with Rmd templates --------------\n-#   a. templates without placeholder variables:\n-#         copy templates from tool directory to the working directory.\n-#   b. templates with placeholder variables:\n-#         substitute variables with user input values and place them in the working directory.\n+##============ Sink warnings and errors to a file ==============\n+## use the sink() function to wrap all code within it.\n+##==============================================================\n+zz = file(\'warnings_and_errors.txt\')\n+sink(zz)\n+sink(zz, type = \'message\')\n+  ##---------below is the code for rendering .Rmd templates-----\n+  \n+  ##=============STEP 1: handle command line arguments==========\n+  ##\n+  ##============================================================\n+  # column 1: the long flag name\n+  # column 2: the short flag alias. A SINGLE character string\n+  # column 3: argument mask\n+  #           0: no argument\n+  #           1: argument required\n+  #           2: argument i'..b' {\n+      gsub(\'ECHO\', opt$echo, x)\n+    }) %>%\n+    (function(x) {\n+      gsub(\'REPORT_DIR\', opt$report_dir, x)\n+    }) %>%\n+    (function(x) {\n+      fileConn = file(\'x06_per_sequence_gc_content.Rmd\')\n+      writeLines(x, con=fileConn)\n+      close(fileConn)\n+    })\n+  \n+  # 07_per_base_n_content.Rmd\n+  readLines(opt$x07_per_base_n_content) %>%\n+    (function(x) {\n+      gsub(\'ECHO\', opt$echo, x)\n+    }) %>%\n+    (function(x) {\n+      gsub(\'REPORT_DIR\', opt$report_dir, x)\n+    }) %>%\n+    (function(x) {\n+      fileConn = file(\'x07_per_base_n_content.Rmd\')\n+      writeLines(x, con=fileConn)\n+      close(fileConn)\n+    })\n \n-    #----- 02_fastqc_original_reports.Rmd -------------------\n-    readLines(opt$x02_fastqc_original_reports) %>%\n-      (function(x) {\n-        gsub(\'ECHO\', opt$echo, x)\n-      }) %>%\n-      (function(x) {\n-        gsub(\'REPORT_OUTPUT_DIR\', opt$fastqc_site_dir, x)\n-      }) %>%\n-      (function(x) {\n-        fileConn = file(\'02_fastqc_original_reports.Rmd\')\n-        writeLines(x, con=fileConn)\n-        close(fileConn)\n-      })\n+  # 08_sequence_length_distribution.Rmd\n+  readLines(opt$x08_sequence_length_distribution) %>%\n+    (function(x) {\n+      gsub(\'ECHO\', opt$echo, x)\n+    }) %>%\n+    (function(x) {\n+      gsub(\'REPORT_DIR\', opt$report_dir, x)\n+    }) %>%\n+    (function(x) {\n+      fileConn = file(\'x08_sequence_length_distribution.Rmd\')\n+      writeLines(x, con=fileConn)\n+      close(fileConn)\n+    })\n+  \n+  # 09_sequence_duplication_levels.Rmd\n+  readLines(opt$x09_sequence_duplication_levels) %>%\n+    (function(x) {\n+      gsub(\'ECHO\', opt$echo, x)\n+    }) %>%\n+    (function(x) {\n+      gsub(\'REPORT_DIR\', opt$report_dir, x)\n+    }) %>%\n+    (function(x) {\n+      fileConn = file(\'x09_sequence_duplication_levels.Rmd\')\n+      writeLines(x, con=fileConn)\n+      close(fileConn)\n+    })\n+  \n+  # 10_adapter_content.Rmd\n+  readLines(opt$x10_adapter_content) %>%\n+    (function(x) {\n+      gsub(\'ECHO\', opt$echo, x)\n+    }) %>%\n+    (function(x) {\n+      gsub(\'REPORT_DIR\', opt$report_dir, x)\n+    }) %>%\n+    (function(x) {\n+      fileConn = file(\'x10_adapter_content.Rmd\')\n+      writeLines(x, con=fileConn)\n+      close(fileConn)\n+    })\n+  \n+  # 11_kmer_content.Rmd\n+  readLines(opt$x11_kmer_content) %>%\n+    (function(x) {\n+      gsub(\'ECHO\', opt$echo, x)\n+    }) %>%\n+    (function(x) {\n+      gsub(\'REPORT_DIR\', opt$report_dir, x)\n+    }) %>%\n+    (function(x) {\n+      fileConn = file(\'x11_kmer_content.Rmd\')\n+      writeLines(x, con=fileConn)\n+      close(fileConn)\n+    })\n+  \n+  ##=============STEP 5: render all .Rmd templates=================\n+  ##\n+  ##===========================================================\n+  extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") {\n+    f = readLines(fastqc_data)\n+    start_line = grep(module_name, f)\n+    end_module_lines = grep(\'END_MODULE\', f)\n+    end_line = end_module_lines[which(end_module_lines > start_line)[1]]\n+    module_data = f[(start_line+1):(end_line-1)]\n+    writeLines(module_data, \'temp.txt\')\n+    read.csv(\'temp.txt\', sep = \'\\t\', header = header, comment.char = comment.char)\n+  }\n+  render_site()\n+  \n+  ##=============STEP 6: manipulate outputs====================\n+  ##\n+  ##===========================================================\n+  file.copy(\'my_site/index.html\', opt$report_html, recursive = TRUE)\n+  system(paste0(\'cp -r my_site/* \', opt$report_dir))\n \n \n-\n-#------ 3. render all Rmd files with render_site() --------\n-render_site()    \n-\n-\n-#-------4. manipulate outputs -----------------------------\n-#   a. copy index.html to the report output path\n-#   b. copy all files in \'my_site\' to the report output directory\n-file.copy(\'my_site/index.html\', opt$fastqc_site, recursive=TRUE)\n-paste0(\'cp -r my_site/* \', opt$fastqc_site_dir) %>%\n-  system()\n-\n-\n+  ##--------end of code rendering .Rmd templates----------------\n+sink()\n+##=========== End of sinking output=============================\n\\ No newline at end of file\n'
b
diff -r 600c39b11913 -r 507eec497730 index.Rmd
--- a/index.Rmd Tue Aug 15 15:50:21 2017 -0400
+++ b/index.Rmd Tue Nov 07 16:52:24 2017 -0500
b
@@ -4,7 +4,7 @@
 ---
 
 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
+knitr::opts_chunk$set(echo = TRUE, error = TRUE)
 ```