Galaxy |

Changeset 11:507eec497730 (2017-11-07)

Previous changeset 10:600c39b11913 (2017-08-15) Next changeset 12:68ea2ebbf866 (2017-11-09)

Commit message:
update fastqc site

modified:
01_evaluation_overview.Rmd
_site.yml
fastqc_site.xml
fastqc_site_render.R
index.Rmd

added:
02_per_base_sequence_quality.Rmd
03_per_tile_sequence_quality.Rmd
04_per_sequence_quality_score.Rmd
05_per_base_sequence_content.Rmd
06_per_sequence_gc_content.Rmd
07_per_base_n_content.Rmd
08_sequence_length_distribution.Rmd
09_sequence_duplication_levels.Rmd
10_adapter_content.Rmd
11_kmer_content.Rmd

removed:
02_fastqc_original_reports.Rmd
1_per_base_quality_scores.Rmd
2_per_base_N_content.Rmd
3_per_sequence_quality_scores.Rmd
4_per_sequence_GC_content.Rmd
5_per_base_sequence_content.Rmd

diff -r 600c39b11913 -r 507eec497730 01_evaluation_overview.Rmd
--- a/01_evaluation_overview.Rmd Tue Aug 15 15:50:21 2017 -0400
+++ b/01_evaluation_overview.Rmd Tue Nov 07 16:52:24 2017 -0500

[

b'@@ -1,123 +1,124 @@\n ---\n-title: "Evaluation Overview"\n-output: html_document\n+title: \'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)\'\n+output:\n+ html_document:\n+ number_sections: true\n+ toc: true\n+ theme: cosmo\n+ highlight: tango\n ---\n \n ```{r setup, include=FALSE, warning=FALSE, message=FALSE}\n-knitr::opts_chunk$set(echo = ECHO)\n-```\n-\n-```{bash \'copy data from datasets directory to working directory\', echo=FALSE}\n-# Copy uploaded data to the working directory\n-for f in $(echo READS | sed "s/,/ /g")\n-do\n- cp $f ./\n-done\n-```\n-\n-```{bash \'run fastqc\', echo=FALSE}\n-# run fastqc and place outputs into the report directory\n-for r in $(ls *.dat)\n-do\n- fastqc -o REPORT_OUTPUT_DIR $r > /dev/null 2>&1\n-done\n-```\n-\n-```{bash \'parse fastqc results\', echo=FALSE}\n-##==== copy fastqc generated zip files from report output directory to job work directory ==\n-cp -r REPORT_OUTPUT_DIR/*zip ./\n-\n-# create a file to store data file paths\n-echo "sample_id,file_path" > PWF_file_paths.txt # Pass, Warning, Fail\n-echo "sample_id,file_path" > PBQS_file_paths.txt # Per Base Quality Score\n-echo "sample_id,file_path" > PSQS_file_paths.txt # Per Sequence Quality Score\n-echo "sample_id,file_path" > PSGC_file_paths.txt # Per Sequence GC Content\n-echo "sample_id,file_path" > PBSC_file_paths.txt # Per Base Sequence Content\n-echo "sample_id,file_path" > PBNC_file_paths.txt # Per Base N Content\n-echo "sample_id,file_path" > SDL_file_paths.txt # Sequence Duplication Level\n-echo "sample_id,file_path" > SLD_file_paths.txt # Sequence Length Distribution\n-echo "sample_id,file_path" > KMC_file_paths.txt # Kmer Content\n-\n-for i in $(ls *.zip)\n-do\n- BASE=$(echo $i | sed \'s/\$.*\$\\.zip/\\1/g\')\n- echo $BASE\n- unzip ${BASE}.zip > /dev/null 2>&1\n- \n- ##====== pass,warning,fail (WSF) =============\n- awk \'/^>>/ {print}\' "$BASE"/fastqc_data.txt | grep -v \'END_MODULE\' | sed \'s/>>//\' > "$BASE"-PWF.txt\n- echo "${BASE},${BASE}-PWF.txt" >> PWF_file_paths.txt\n-\n- ##====== per base quality scores (PBQS) ======\n- awk \'/^>>Per base sequence quality/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-PBQS.txt\n- echo "${BASE},${BASE}-PBQS.txt" >> PBQS_file_paths.txt\n-\n- ##====== per sequence quality scores (PSQS)\n- awk \'/^>>Per sequence quality scores/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-PSQS.txt\n- echo "${BASE},${BASE}-PSQS.txt" >> PSQS_file_paths.txt\n-\n- ##====== Per sequence GC content (PSGC)\n- awk \'/^>>Per sequence GC content/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-PSGC.txt\n- echo "${BASE},${BASE}-PSGC.txt" >> PSGC_file_paths.txt\n- \n- ##====== Per Base Sequence Content (PBSC)\n- awk \'/^>>Per base sequence content/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-PBSC.txt\n- echo "${BASE},${BASE}-PBSC.txt" >> PBSC_file_paths.txt\n- \n- ##====== Per Base N Content (PBNC)\n- awk \'/^>>Per base N content/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-PBNC.txt\n- echo "${BASE},${BASE}-PBNC.txt" >> PBNC_file_paths.txt\n- \n- ##====== Sequence Duplication Level (SDL)\n- awk \'/^>>Sequence Duplication Levels/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-SDL.txt\n- echo "${BASE},${BASE}-SDL.txt" >> SDL_file_paths.txt\n- \n- ##====== Sequence Length Distribution (SLD)\n- awk \'/^>>Sequence Length Distribution/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-SLD.txt\n- echo "${BASE},${BASE}-SLD.txt" >> SLD_file_paths.txt\n- \n- ##====== Kmer Content ============\n- awk \'/^>>Kmer Content/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-KMC.txt\n- echo "${BASE},${BASE}-KMC.txt" >> KMC_file_paths.txt\n- \n-done\n+knitr::opts_chunk$set(\n+ echo = ECHO,\n+ error = TRUE\n+)\n ```\n \n \n-## Evaluat'..b' stop("No pre-trimming reads provided!")\n+} else {\n+ ## run fastqc evaluation\n+ fastqc_command = paste0(\'fastqc \') %>% \n+ (function(x) {\n+ ifelse(\'CONTAMINANTS\' != \'None\', paste0(x, \'-c CONTAMINANTS \'), x)\n+ }) %>% \n+ (function(x) {\n+ ifelse(\'LIMITS\' != \'None\', paste0(x, \'-l LIMITS \'), x)\n+ }) %>% \n+ (function(x) {\n+ paste0(x, \'-o REPORT_DIR \')\n+ })\n+ fastqc_command_reads_1 = paste0(fastqc_command, \'READS_1 > /dev/null 2>&1\')\n+ system(fastqc_command_reads_1, intern = TRUE)\n+ \n+ # Original html report\n+ reads_1_base = tail(strsplit(\'READS_1\', \'/\')[[1]], 1)\n+ original_html = tags$a(href=paste0(reads_1_base, \'_fastqc.html\'), paste0(\'HTML report: \', opt$name_1))\n+ \n+ unzip(paste0(\'REPORT_DIR/\', reads_1_base, \'_fastqc.zip\'), exdir = \'REPORT_DIR\')\n+ reads_1_unzip = paste0(\'REPORT_DIR/\', reads_1_base, \'_fastqc/\')\n+ # fastqc_data.txt\n+ file.copy(paste0(reads_1_unzip, \'fastqc_data.txt\'), \'REPORT_DIR/reads_1_fastqc_data.txt\')\n+ fastqc_data = tags$a(href=\'reads_1_fastqc_data.txt\', paste0(\'fastqc_data.txt: \', opt$name_1))\n+ # summary.txt\n+ file.copy(paste0(reads_1_unzip, \'summary.txt\'), \'REPORT_DIR/reads_1_summary.txt\')\n+ summary_data = tags$a(href=\'reads_1_summary.txt\', paste0(\'summary.txt: \', opt$name_1))\n+ \n+ tags$ul(\n+ tags$li(original_html),\n+ tags$li(fastqc_data),\n+ tags$li(summary_data)\n+ )\n }\n ```\n \n \n+## Evaluation of reads after trimming\n+\n ```{r}\n-my_icon = c(\'ok\', \'remove\', \'star\')\n-names(my_icon) = c(\'pass\', \'fail\', \'warn\')\n-evaluate_list = list()\n-for (i in colnames(PWF_df)[-1]) {\n- evaluate_list[[i]] = formatter(\n- "span", \n- style = x ~ style("background-color" = ifelse(x ==\'pass\', \'#9CD027\', ifelse(x == \'fail\', \'#CC0000\', \'#FF4E00\')), \n- "color" = "white",\n- "width" = "50px",\n- "float" = "left",\n- "padding-right" = "5px")\n- )\n+if (\'READS_2\' == \'None\') {\n+ stop("No pre-trimming reads provided!")\n+} else {\n+ ## run fastqc evaluation\n+ fastqc_command = paste0(\'fastqc \') %>% \n+ (function(x) {\n+ ifelse(\'CONTAMINANTS\' != \'None\', paste0(x, \'-c CONTAMINANTS \'), x)\n+ }) %>% \n+ (function(x) {\n+ ifelse(\'LIMITS\' != \'None\', paste0(x, \'-l LIMITS \'), x)\n+ }) %>% \n+ (function(x) {\n+ paste0(x, \'-o REPORT_DIR \')\n+ })\n+ fastqc_command_reads_2 = paste0(fastqc_command, \'READS_2 > /dev/null 2>&1\')\n+ system(fastqc_command_reads_2, intern = TRUE)\n+ \n+ # Original html report\n+ reads_2_base = tail(strsplit(\'READS_2\', \'/\')[[1]], 1)\n+ original_html = tags$a(href=paste0(reads_2_base, \'_fastqc.html\'), paste0(\'HTML report: \', opt$name_2))\n+ \n+ unzip(paste0(\'REPORT_DIR/\', reads_2_base, \'_fastqc.zip\'), exdir = \'REPORT_DIR\')\n+ reads_2_unzip = paste0(\'REPORT_DIR/\', reads_2_base, \'_fastqc/\')\n+ # fastqc_data.txt\n+ file.copy(paste0(reads_2_unzip, \'fastqc_data.txt\'), \'REPORT_DIR/reads_2_fastqc_data.txt\')\n+ fastqc_data = tags$a(href=\'reads_2_fastqc_data.txt\', paste0(\'fastqc_data.txt: \', opt$name_2))\n+ # summary.txt\n+ file.copy(paste0(reads_2_unzip, \'summary.txt\'), \'REPORT_DIR/reads_2_summary.txt\')\n+ summary_data = tags$a(href=\'reads_2_summary.txt\', paste0(\'summary.txt: \', opt$name_2))\n+ \n+ tags$ul(\n+ tags$li(original_html),\n+ tags$li(fastqc_data),\n+ tags$li(summary_data)\n+ )\n }\n+```\n \n-formattable(PWF_df, evaluate_list)\n+\n+\n+# Fastqc output visualization\n+\n+## Overview\n+\n+```{r}\n+reads_1_summary = read.csv(\'REPORT_DIR/reads_1_summary.txt\', header = FALSE, sep = \'\\t\')[, 2:1]\n+reads_2_summary = read.csv(\'REPORT_DIR/reads_2_summary.txt\', header = FALSE, sep = \'\\t\')[, 1]\n+combined_summary = cbind(reads_1_summary, reads_2_summary)\n+names(combined_summary) = c(\'MODULE\', paste0(opt$name_1, \'(before)\'), paste0(opt$name_2, \'(after)\'))\n+combined_summary[combined_summary == \'FAIL\'] = \'FAIL (X)\'\n+combined_summary[combined_summary == \'WARN\'] = \'WARN (!)\'\n+knitr::kable(combined_summary)\n+```\n+\n+# Session Info\n+\n+```{r \'session info\'}\n+sessionInfo()\n ```\n\\ No newline at end of file\n'

diff -r 600c39b11913 -r 507eec497730 02_fastqc_original_reports.Rmd
--- a/02_fastqc_original_reports.Rmd Tue Aug 15 15:50:21 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,20 +0,0 @@
----
-title: "FastQC original reports"
-output: html_document
----
-
-```{r 'FastQC original reports', include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-
-Below are links to ***Fastqc*** original html reports.
-
-```{r 'html report links'}
-html_report_list = list()
-html_files = list.files('REPORT_OUTPUT_DIR', pattern = '.*html')
-for (i in html_files) {
- html_report_list[[i]] = tags$li(tags$a(href=i, i))
-}
-tags$ul(html_report_list)
-```
\ No newline at end of file

diff -r 600c39b11913 -r 507eec497730 02_per_base_sequence_quality.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/02_per_base_sequence_quality.Rmd Tue Nov 07 16:52:24 2017 -0500

@@ -0,0 +1,47 @@
+---
+title: 'Per base sequence quality'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Per base sequence quality
+
+```{r 'per base sequence quality', fig.width=10}
+## reads 1
+pbsq_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base sequence quality')
+pbsq_1$id = 1:length(pbsq_1$X.Base)
+
+melt_pbsq_1 = filter(melt(pbsq_1, id=c('X.Base', 'id')), variable == 'Mean')
+melt_pbsq_1$trim = 'before'
+
+
+## reads 2
+pbsq_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base sequence quality')
+pbsq_2$id = 1:length(pbsq_2$X.Base)
+
+melt_pbsq_2 = filter(melt(pbsq_2, id=c('X.Base', 'id')), variable == 'Mean')
+melt_pbsq_2$trim = 'after'
+
+comb_pbsq = rbind(melt_pbsq_1, melt_pbsq_2)
+comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim)
+
+p = ggplot(data = comb_pbsq) +
+  geom_line(mapping = aes(x = id, y = value, group = variable, color = variable)) +
+  scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) +
+  facet_grid(. ~ trim) +
+  ylim(0, max(comb_pbsq$value) + 5) +
+  theme(axis.text.x = element_text(angle=45))
+ggplotly(p)
+
+```
\ No newline at end of file

diff -r 600c39b11913 -r 507eec497730 03_per_tile_sequence_quality.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/03_per_tile_sequence_quality.Rmd Tue Nov 07 16:52:24 2017 -0500

@@ -0,0 +1,48 @@
+---
+title: 'Per Tile Sequence Quality'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Per tile sequence quality
+
+```{r 'per tile sequence quality', fig.width=10}
+## check if 'per tile sequence quality' module exits or not
+check_ptsq = grep('Per tile sequence quality', readLines('REPORT_DIR/reads_1_fastqc_data.txt'))
+if (length(check_ptsq) > 0) {
+    ## reads 1
+  ptsq_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per tile sequence quality')
+  ptsq_1$trim = 'before'
+
+  ## reads 2
+  ptsq_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per tile sequence quality')
+  ptsq_2$trim = 'after'
+
+  comb_ptsq = rbind(ptsq_1, ptsq_2)
+  comb_ptsq$trim = factor(levels = c('before', 'after'), comb_ptsq$trim)
+  comb_ptsq$Base = factor(levels = unique(comb_ptsq$Base), comb_ptsq$Base)
+
+  # convert integers to charaters
+  comb_ptsq$Tile = as.character(comb_ptsq$X.Tile)
+
+  p = ggplot(data = comb_ptsq, aes(x = Base, y = Tile, fill = Mean)) +
+    geom_raster() +
+    facet_grid(. ~ trim) +
+    xlab('Position in read (bp)') +
+    ylab('') +
+    theme(axis.text.x = element_text(angle=45))
+  ggplotly(p)
+} else {
+  print('No "per tile sequence quality" data')
+}
\ No newline at end of file

diff -r 600c39b11913 -r 507eec497730 04_per_sequence_quality_score.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/04_per_sequence_quality_score.Rmd Tue Nov 07 16:52:24 2017 -0500

@@ -0,0 +1,39 @@
+---
+title: 'Per sequence quality score'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Per sequence quality score
+
+```{r 'Per sequence quality score', fig.width=10}
+## reads 1
+psqs_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per sequence quality scores')
+psqs_1$trim = 'before'
+
+## reads 2
+psqs_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per sequence quality scores')
+psqs_2$trim = 'after'
+
+comb_psqs = rbind(psqs_1, psqs_2)
+comb_psqs$trim = factor(levels = c('before', 'after'), comb_psqs$trim)
+
+p = ggplot(data = comb_psqs, aes(x = X.Quality, y = Count)) +
+  geom_line(color = 'red') +
+  facet_grid(. ~ trim) +
+  xlim(min(comb_psqs$X.Quality), max(comb_psqs$X.Quality)) +
+  xlab('Mean Sequence Qaulity (Phred Score)') +
+  ylab('')
+ggplotly(p)
+```
\ No newline at end of file

diff -r 600c39b11913 -r 507eec497730 05_per_base_sequence_content.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/05_per_base_sequence_content.Rmd Tue Nov 07 16:52:24 2017 -0500

@@ -0,0 +1,47 @@
+---
+title: 'Per base sequence content'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Per base sequence content
+
+```{r 'Per base sequence content', fig.width=10}
+## reads 1
+pbsc_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base sequence content')
+pbsc_1$id = 1:length(pbsc_1$X.Base)
+
+melt_pbsc_1 = melt(pbsc_1, id=c('X.Base', 'id'))
+melt_pbsc_1$trim = 'before'
+
+
+## reads 2
+pbsc_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base sequence content')
+pbsc_2$id = 1:length(pbsc_2$X.Base)
+
+melt_pbsc_2 = melt(pbsc_2, id=c('X.Base', 'id'))
+melt_pbsc_2$trim = 'after'
+
+comb_pbsc = rbind(melt_pbsc_1, melt_pbsc_2)
+comb_pbsc$trim = factor(levels = c('before', 'after'), comb_pbsc$trim)
+
+p = ggplot(data = comb_pbsc, aes(x = id, y = value, color = variable)) +
+  geom_line() +
+  facet_grid(. ~ trim) +
+  xlim(min(comb_pbsc$id), max(comb_pbsc$id)) +
+  ylim(0, 100) +
+  xlab('Position in read (bp)') +
+  ylab('')
+ggplotly(p)
+```
\ No newline at end of file

diff -r 600c39b11913 -r 507eec497730 06_per_sequence_gc_content.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/06_per_sequence_gc_content.Rmd Tue Nov 07 16:52:24 2017 -0500

@@ -0,0 +1,38 @@
+---
+title: 'Per sequence GC content'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Per sequence GC content
+
+```{r 'Per sequence GC content', fig.width=10}
+## reads 1
+psGCc_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per sequence GC content')
+psGCc_1$trim = 'before'
+
+## reads 2
+psGCc_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per sequence GC content')
+psGCc_2$trim = 'after'
+
+comb_psGCc = rbind(psGCc_1, psGCc_2)
+comb_psGCc$trim = factor(levels = c('before', 'after'), comb_psGCc$trim)
+
+p = ggplot(data = comb_psGCc, aes(x = X.GC.Content, y = Count)) +
+  geom_line(color = 'red') +
+  facet_grid(. ~ trim) +
+  xlab('Mean Sequence Qaulity (Phred Score)') +
+  ylab('')
+ggplotly(p)
+```
\ No newline at end of file

diff -r 600c39b11913 -r 507eec497730 07_per_base_n_content.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/07_per_base_n_content.Rmd Tue Nov 07 16:52:24 2017 -0500

@@ -0,0 +1,43 @@
+---
+title: 'Per base N content'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Per base N content
+
+```{r 'Per base N content', fig.width=10}
+## reads 1
+pbNc_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base N content')
+pbNc_1$id = 1:length(pbNc_1$X.Base)
+pbNc_1$trim = 'before'
+
+## reads 2
+pbNc_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base N content')
+pbNc_2$id = 1:length(pbNc_2$X.Base)
+pbNc_2$trim = 'after'
+
+comb_pbNc = rbind(pbNc_1, pbNc_2)
+comb_pbNc$trim = factor(levels = c('before', 'after'), comb_pbNc$trim)
+
+p = ggplot(data = comb_pbNc, aes(x = id, y = N.Count)) +
+  geom_line(color = 'red') +
+  scale_x_continuous(breaks = pbNc_2$id, labels = pbNc_2$X.Base) +
+  facet_grid(. ~ trim) +
+  ylim(0, 1) +
+  xlab('N-Count') +
+  ylab('') +
+  theme(axis.text.x = element_text(angle=45))
+ggplotly(p)
+```
\ No newline at end of file

diff -r 600c39b11913 -r 507eec497730 08_sequence_length_distribution.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/08_sequence_length_distribution.Rmd Tue Nov 07 16:52:24 2017 -0500

@@ -0,0 +1,42 @@
+---
+title: 'Sequence Length Distribution'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Sequence Length Distribution
+
+```{r 'Sequence Length Distribution', fig.width=10}
+## reads 1
+sld_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Sequence Length Distribution')
+sld_1$id = 1:length(sld_1$X.Length)
+sld_1$trim = 'before'
+
+## reads 2
+sld_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Sequence Length Distribution')
+sld_2$id = 1:length(sld_2$X.Length)
+sld_2$trim = 'after'
+
+comb_sld = rbind(sld_1, sld_2)
+comb_sld$trim = factor(levels = c('before', 'after'), comb_sld$trim)
+
+p = ggplot(data = comb_sld, aes(x = id, y = Count)) +
+  geom_line(color = 'red') +
+  scale_x_continuous(breaks = sld_2$id, labels = sld_2$X.Length) +
+  facet_grid(. ~ trim) +
+  xlab('Sequence Length (bp)') +
+  ylab('') +
+  theme(axis.text.x = element_text(angle=45))
+ggplotly(p)
+```
\ No newline at end of file

diff -r 600c39b11913 -r 507eec497730 09_sequence_duplication_levels.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/09_sequence_duplication_levels.Rmd Tue Nov 07 16:52:24 2017 -0500

@@ -0,0 +1,49 @@
+---
+title: 'Sequence Duplication Levels'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Sequence Duplication Levels
+
+```{r 'Sequence Duplication Levels', fig.width=10}
+## reads 1
+sdl_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Sequence Duplication Levels', header = FALSE, comment.char = '#')
+names(sdl_1) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total')
+sdl_1$id = 1:length(sdl_1$Duplication_Level)
+
+melt_sdl_1 = melt(sdl_1, id=c('Duplication_Level', 'id'))
+melt_sdl_1$trim = 'before'
+
+
+## reads 2
+sdl_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Sequence Duplication Levels', header = FALSE, comment.char = '#')
+names(sdl_2) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total')
+sdl_2$id = 1:length(sdl_2$Duplication_Level)
+
+melt_sdl_2 = melt(sdl_2, id=c('Duplication_Level', 'id'))
+melt_sdl_2$trim = 'after'
+
+comb_sdl = rbind(melt_sdl_1, melt_sdl_2)
+comb_sdl$trim = factor(levels = c('before', 'after'), comb_sdl$trim)
+
+p = ggplot(data = comb_sdl, aes(x = id, y = value, color = variable)) +
+  geom_line() +
+  scale_x_continuous(breaks = sdl_2$id, labels = sdl_2$Duplication_Level) +
+  facet_grid(. ~ trim) +
+  xlab('Sequence Duplication Level') +
+  ylab('') +
+  theme(axis.text.x = element_text(angle=45))
+ggplotly(p)
+```
\ No newline at end of file

diff -r 600c39b11913 -r 507eec497730 10_adapter_content.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/10_adapter_content.Rmd Tue Nov 07 16:52:24 2017 -0500

@@ -0,0 +1,46 @@
+---
+title: 'Adapter Content'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Adapter Content
+
+```{r 'Adapter Content', fig.width=10}
+## reads 1
+ac_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Adapter Content')
+ac_1$id = 1:length(ac_1$X.Position)
+
+melt_ac_1 = melt(ac_1, id=c('X.Position', 'id'))
+melt_ac_1$trim = 'before'
+
+## reads 2
+ac_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Adapter Content')
+ac_2$id = 1:length(ac_2$X.Position)
+
+melt_ac_2 = melt(ac_2, id=c('X.Position', 'id'))
+melt_ac_2$trim = 'after'
+
+comb_ac = rbind(melt_ac_1, melt_ac_2)
+comb_ac$trim = factor(levels = c('before', 'after'), comb_ac$trim)
+
+p = ggplot(data = comb_ac, aes(x = id, y = value, color = variable)) +
+  geom_line() +
+  facet_grid(. ~ trim) +
+  xlim(min(comb_ac$id), max(comb_ac$id)) +
+  ylim(0, 1) +
+  xlab('Position in read (bp)') +
+  ylab('')
+ggplotly(p)
+```
\ No newline at end of file

diff -r 600c39b11913 -r 507eec497730 11_kmer_content.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/11_kmer_content.Rmd Tue Nov 07 16:52:24 2017 -0500

@@ -0,0 +1,31 @@
+---
+title: 'Kmer Content'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO,
+  error = TRUE
+)
+```
+
+### Kmer Content {.tabset}
+
+#### Before
+
+```{r 'Kmer Content (before)', fig.width=10}
+kc_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Kmer Content')
+knitr::kable(kc_1)
+```
+
+#### After
+```{r 'Kmer Content (after)', fig.width=10}
+kc_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Kmer Content')
+knitr::kable(kc_2)
+```
\ No newline at end of file

diff -r 600c39b11913 -r 507eec497730 1_per_base_quality_scores.Rmd
--- a/1_per_base_quality_scores.Rmd Tue Aug 15 15:50:21 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,62 +0,0 @@
----
-title: "Per Base Quality Scores"
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-
-## Per Base Quality Scores
-
-```{r}
-PBQS_df = data.frame()
-PBQS_file_paths = read.csv('PBQS_file_paths.txt',
-                           header = TRUE, stringsAsFactors = FALSE)
-for(i in 1:nrow(PBQS_file_paths)) {
-  # file_path = paste0('REPORT_OUTPUT_DIR/', PBQS_file_paths[i,2])
-  file_path = PBQS_file_paths[i,2]
-  pbqs_df = read.csv(file_path,
-                     sep='\t', header=TRUE, stringsAsFactors = FALSE) %>%
-    mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]),
-           Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>%
-  (function (df) {
-    df1 = select(df, -Base2)
-    df2 = select(df, -Base1) %>% filter(Base2 != '')
-    colnames(df1) = c(colnames(df1)[1:7], 'Base')
-    colnames(df2) = c(colnames(df2)[1:7], 'Base')
-    res = rbind(df1, df2) %>% arrange(Base)
-    return(res)
-  })
-  pbqs_df$sample_id = rep(PBQS_file_paths[i,1], nrow(pbqs_df))
-  PBQS_df = rbind(PBQS_df, pbqs_df)
-}
-```
-
-
-```{r}
-# datatable(PBQS_df)
-max_phred = max(PBQS_df$Mean) + 10
-hchart(PBQS_df, "line", hcaes(x = Base, y = Mean, group = sample_id)) %>%
-  hc_title(
-    text = "Per Base Quality Score"
-  ) %>%
-  hc_yAxis(
-    title = list(text = "Mean Base Quality Score"),
-    min = 0,
-    max = max_phred,
-    plotLines = list(
-      list(label = list(text = "Phred Score = 27"),
-           width = 2,
-           dashStyle = "dash",
-           color = "green",
-           value = 27),
-      list(label = list(text = "Phred Score = 20"),
-           width = 2,
-           color = "red",
-           value = 20)
-    )
-  ) %>%
-  hc_exporting(enabled = TRUE)
-```

diff -r 600c39b11913 -r 507eec497730 2_per_base_N_content.Rmd
--- a/2_per_base_N_content.Rmd Tue Aug 15 15:50:21 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,58 +0,0 @@
----
-title: "Per Base N Content"
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-## Per Base N Content
-
-```{r}
-PBNC_df = data.frame()
-PBNC_file_paths = read.csv('PBNC_file_paths.txt',
-                           header = TRUE, stringsAsFactors = FALSE)
-for(i in 1:nrow(PBNC_file_paths)) {
-  # file_path = paste0('REPORT_OUTPUT_DIR/', PBNC_file_paths[i,2])
-  file_path = PBNC_file_paths[i,2]
-  pbnc_df = read.csv(file_path,
-                     sep='\t', header=TRUE, stringsAsFactors = FALSE) %>%
-    mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]),
-           Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>%
-  (function (df) {
-    df1 = select(df, -Base2)
-    df2 = select(df, -Base1) %>% filter(Base2 != '')
-    colnames(df1) = c(colnames(df1)[1:2], 'Base')
-    colnames(df2) = c(colnames(df2)[1:2], 'Base')
-    res = rbind(df1, df2) %>% arrange(Base)
-    return(res)
-  })
-  pbnc_df$sample_id = rep(PBNC_file_paths[i,1], nrow(pbnc_df))
-  PBNC_df = rbind(PBNC_df, pbnc_df)
-}
-```
-
-
-```{r}
-PBNC_df$N.Count = PBNC_df$N.Count * 100
-max_phred = max(PBNC_df$N.Count) + 5
-hchart(PBNC_df, "line", hcaes(x = as.character(Base), y = N.Count, group = sample_id)) %>%
-  hc_title(
-    text = "Per Base N Content"
-  ) %>%
-  hc_xAxis(
-    title = list(text = "Base Position")
-  ) %>%
-  hc_yAxis(
-    title = list(text = "N %"),
-    plotLines = list(
-      list(label = list(text = "N = 5%"),
-           width = 2,
-           dashStyle = "dash",
-           color = "red",
-           value = 5)
-    )
-  ) %>%
-  hc_exporting(enabled = TRUE)
-```

diff -r 600c39b11913 -r 507eec497730 3_per_sequence_quality_scores.Rmd
--- a/3_per_sequence_quality_scores.Rmd Tue Aug 15 15:50:21 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,50 +0,0 @@
----
-title: "Per Sequence Quality Scores"
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-## Per Sequence Quality Scores
-
-```{r}
-PSQS_df = data.frame()
-PSQS_file_paths = read.csv('PSQS_file_paths.txt',
-                           header = TRUE, stringsAsFactors = FALSE)
-for(i in 1:nrow(PSQS_file_paths)) {
-  # file_path = paste0('REPORT_OUTPUT_DIR/', PSQS_file_paths[i,2])
-  file_path = PSQS_file_paths[i,2]
-  psqs_df = read.csv(file_path,
-                     sep='\t', header=TRUE, stringsAsFactors = FALSE)
-  psqs_df$sample_id = rep(PSQS_file_paths[i,1], nrow(psqs_df))
-  PSQS_df = rbind(PSQS_df, psqs_df)
-}
-```
-
-
-```{r}
-max_phred = max(PSQS_df$X.Quality) + 5
-hchart(PSQS_df, "line", hcaes(x = X.Quality, y = Count, group = sample_id)) %>%
-  hc_title(
-    text = "Per Sequence Quality Score"
-  ) %>%
-  hc_xAxis(
-    title = list(text = "Mean Sequence Quality Score"),
-    min = 0,
-    max = max_phred,
-    plotLines = list(
-      list(label = list(text = "Phred Score = 27"),
-           width = 2,
-           dashStyle = "dash",
-           color = "green",
-           value = 27),
-      list(label = list(text = "Phred Score = 20"),
-           width = 2,
-           color = "red",
-           value = 20)
-    )
-  ) %>%
-  hc_exporting(enabled = TRUE)
-```

diff -r 600c39b11913 -r 507eec497730 4_per_sequence_GC_content.Rmd
--- a/4_per_sequence_GC_content.Rmd Tue Aug 15 15:50:21 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,38 +0,0 @@
----
-title: "Per Sequence GC Content"
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-## Per Sequence GC Content
-
-
-```{r}
-PSGC_df = data.frame()
-PSGC_file_paths = read.csv('PSGC_file_paths.txt',
-                           header = TRUE, stringsAsFactors = FALSE)
-for(i in 1:nrow(PSGC_file_paths)) {
-  # file_path = paste0('REPORT_OUTPUT_DIR/', PSGC_file_paths[i,2])
-  file_path = PSGC_file_paths[i,2]
-  psgc_df = read.csv(file_path,
-                     sep='\t', header=TRUE, stringsAsFactors = FALSE)
-  psgc_df$sample_id = rep(PSGC_file_paths[i,1], nrow(psgc_df))
-  PSGC_df = rbind(PSGC_df, psgc_df)
-}
-```
-
-
-```{r}
-max_phred = max(PSGC_df$Count) + 5
-hchart(PSGC_df, "line", hcaes(x = X.GC.Content, y = Count, group = sample_id)) %>%
-  hc_title(
-    text = "Per Sequence GC Content"
-  ) %>%
-  hc_xAxis(
-    title = list(text = "% GC")
-  ) %>%
-  hc_exporting(enabled = TRUE)
-```

diff -r 600c39b11913 -r 507eec497730 5_per_base_sequence_content.Rmd
--- a/5_per_base_sequence_content.Rmd Tue Aug 15 15:50:21 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,45 +0,0 @@
----
-title: "Per Base Sequence Content"
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = ECHO)
-```
-
-## Per Base Sequence Content
-
-```{r}
-PBSC_df = data.frame()
-PBSC_file_paths = read.csv('PBSC_file_paths.txt',
-                           header = TRUE, stringsAsFactors = FALSE)
-for(i in 1:nrow(PBSC_file_paths)) {
-  # file_path = paste0('REPORT_OUTPUT_DIR/', PBSC_file_paths[i,2])
-  file_path = PBSC_file_paths[i,2]
-  pbsc_df = read.csv(file_path,
-                     sep='\t', header=TRUE, stringsAsFactors = FALSE) %>%
-    mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]),
-           Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>%
-  (function (df) {
-    df1 = select(df, -Base2)
-    df2 = select(df, -Base1) %>% filter(Base2 != '')
-    colnames(df1) = c(colnames(df1)[1:5], 'Base')
-    colnames(df2) = c(colnames(df2)[1:5], 'Base')
-    res = rbind(df1, df2) %>% arrange(Base)
-    return(res)
-  })
-  pbsc_df$sample_id = rep(PBSC_file_paths[i,1], nrow(pbsc_df))
-  PBSC_df = rbind(PBSC_df, pbsc_df)
-}
-```
-
-
-```{r out.width="100%"}
-PBSC_df_2 = select(PBSC_df, -X.Base) %>%
-  melt(id = c('Base', 'sample_id'), value.name = 'base_percentage')
-p = ggplot(data = PBSC_df_2, aes(x = Base, y = base_percentage, group = variable, color = variable)) +
-  geom_line() +
-  facet_wrap(~ sample_id)
-ggplotly(p)
-```
-

diff -r 600c39b11913 -r 507eec497730 _site.yml
--- a/_site.yml Tue Aug 15 15:50:21 2017 -0400
+++ b/_site.yml Tue Nov 07 16:52:24 2017 -0500

@@ -8,21 +8,29 @@
           icon: fa-home
           href: index.html
         - text: "Evaluation Overview"
-          href: 01_evaluation_overview.html
-        - text: "Evaluation Items"
+          href: x01_evaluation_overview.html
+        - text: "Evaluation by data module"
           menu:
-            - text: "Per Base Quality Scores"
-              href: 1_per_base_quality_scores.html
+            - text: "Per Base Sequence Quality"
+              href: x02_per_base_sequence_quality.html
+            - text: "Per Tile Sequence Quality"
+              href: x03_per_tile_sequence_quality.html
+            - text: "Per Sequence Quality Score"
+              href: x04_per_sequence_quality_score.html
+            - text: "Per Base Sequence Content"
+              href: x05_per_base_sequence_content.html
+            - text: "Per Sequence GC Content"
+              href: x06_per_sequence_gc_content.html
             - text: "Per Base N Content"
-              href: 2_per_base_N_content.html
-            - text: "Per Sequence Quality Scores"
-              href: 3_per_sequence_quality_scores.html
-            - text: "Per Sequence GC Content"
-              href: 4_per_sequence_GC_content.html
-            - text: "Per Base Sequence Content"
-              href: 5_per_base_sequence_content.html
-        - text: "Original FastQC Reports"
-          href: 02_fastqc_original_reports.html
+              href: x07_per_base_n_content.html
+            - text: "Sequence Length Distribution"
+              href: x08_sequence_length_distribution.html
+            - text: "Sequence Duplication Levels"
+              href: x09_sequence_duplication_levels.html
+            - text: "Adapter Content"
+              href: x10_adapter_content.html
+            - text: "Kmer Content"
+              href: x11_kmer_content.html
output:
   html_document:
     theme: cosmo

diff -r 600c39b11913 -r 507eec497730 fastqc_site.xml
--- a/fastqc_site.xml Tue Aug 15 15:50:21 2017 -0400
+++ b/fastqc_site.xml Tue Nov 07 16:52:24 2017 -0500

[

b'@@ -1,9 +1,9 @@\n-<tool id="fastqc_site" name="Fastqc Site" version="1.0.0">\n+<tool id="fastqc_site" name="Fastqc Site" version="2.0.0">\n <requirements>\n <requirement type="package" version="1.15.0.6-0">pandoc</requirement>\n <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement>\n <requirement type="package" version="1.20.0">r-getopt</requirement>\n- <requirement type="package" version="1.2">r-rmarkdown</requirement>\n+ <requirement type="package" version="1.3">r-rmarkdown</requirement>\n <requirement type="package" version="1.8.4">r-plyr</requirement>\n <requirement type="package" version="1.1.0">r-stringr</requirement>\n <requirement type="package" version="0.5.0">r-highcharter</requirement>\n@@ -14,22 +14,12 @@\n <requirement type="package" version="0.3.5">r-htmltools</requirement>\n <requirement type="package" version="0.11.5">fastqc</requirement>\n </requirements>\n- <description>\n- Implements FastQC analysis and display results in R Markdown website.\n- </description>\n <stdio>\n- <regex match="Execution halted"\n- source="both"\n- level="fatal"\n- description="Execution halted." />\n- <regex match="Error in"\n- source="both"\n- level="fatal"\n- description="An undefined error occured, please check your intput carefully and contact your administrator." />\n- <regex match="Fatal error"\n- source="both"\n- level="fatal"\n- description="An undefined error occured, please check your intput carefully and contact your administrator." />\n+ \n+ <regex match="XXX"\n+ source="stderr"\n+ level="warning"\n+ description="Check the warnings_and_errors.txt file for more details."/>\n </stdio>\n <command>\n <![CDATA[\n@@ -37,36 +27,65 @@\n Rscript \'${__tool_directory__}/fastqc_site_render.R\'\n \n ## 1. input data\n- -r $reads\n -e $echo\n+ -r $reads_1\n+ -n \'$reads_1.name\'\n+ -R $reads_2\n+ -N \'$reads_2.name\'\n+ -c $contaminants\n+ -l $limits\n \n ## 2. output report and report site directory\n-\t\t -o $fastqc_site\n-\t\t -d $fastqc_site.files_path\n+\t\t -o $report\n+\t\t -d $report.files_path\n+\t\t -s $sink_message\n \n \t\t ## 3. Rmd templates sitting in the tool directory\n \n-\t\t ## _site.yml and index.Rmd template files\n- -s \'${__tool_directory__}/_site.yml\'\n- -i \'${__tool_directory__}/index.Rmd\'\n+\t\t ## _site.yml and index.Rmd template files\n+ -S \'${__tool_directory__}/_site.yml\'\n+ -I \'${__tool_directory__}/index.Rmd\'\n \n- ## other Rmd body template files\n-\t\t -p \'${__tool_directory__}/01_evaluation_overview.Rmd\'\n-\t\t -a \'${__tool_directory__}/02_fastqc_original_reports.Rmd\'\n-\t\t -b \'${__tool_directory__}/1_per_base_quality_scores.Rmd\'\n-\t\t -c \'${__tool_directory__}/2_per_base_N_content.Rmd\'\n-\t\t -f \'${__tool_directory__}/3_per_sequence_quality_scores.Rmd\'\n-\t\t -g \'${__tool_directory__}/4_per_sequence_GC_content.Rmd\'\n-\t\t -h \'${__tool_directory__}/5_per_base_sequence_content.Rmd\'\n+ ## other Rmd body template files\n+\t\t -A \'${__tool_directory__}/01_evaluation_overview.Rmd\'\n+\t\t -B \'${__tool_directory__}/02_per_base_sequence_quality.Rmd\'\n+\t\t -C \'${__tool_directory__}/03_per_tile_sequence_quality.Rmd\'\n+\t\t -D \'${__tool_directory__}/04_per_sequence_quality_score.Rmd\'\n+\t\t -E \'${__tool_directory__}/05_per_base_sequence_content.Rmd\'\n+\t\t -F \'${__tool_directory__}/06_per_sequence_gc_content.Rmd\'\n+\t\t -G \'${__tool_directory__}/07_per_base_n_content.Rmd\'\n'..b't="fastq,fastq.gz,fastq.bz2,bam,sam" name="reads_2" type="data"\n+ label="Short reads after trimming"\n+ help="Short reads data from history. This could be reads after trimming."/>\n+ <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list"\n+ help="Specifies a non-default file which contains the list of adapter sequences which will be explicitly\n+ searched against the library. The file must contain sets of named adapters\n+ in the form name[tab]sequence. Lines prefixed with a hash will be ignored."/>\n+ <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file"\n+ help="Specifies a non-default file which contains a set of criteria\n+ which will be used to determine the warn/error limits for the\n+ various modules. This file can also be used to selectively\n+ remove some modules from the output all together. The format\n+ needs to mirror the default limits.txt file found in the\n+ Configuration folder."/>\n+ <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"\n+ label="Display analysis code in report?"/>\n </inputs>\n <outputs>\n- <data format="html" name="fastqc_site" label="fastqc site" />\n+ <data format="html" name="report" label="fastqc site"/>\n+ <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/>\n </outputs>\n <citations>\n <citation type="bibtex">\n@@ -79,7 +98,8 @@\n <citation type="bibtex">\n @article{allaire2016rmarkdown,\n title={rmarkdown: Dynamic Documents for R, 2016},\n- author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},\n+ author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff\n+ and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},\n journal={R package version 0.9},\n volume={6},\n year={2016}\n@@ -97,31 +117,14 @@\n <citation type="bibtex">\n @misc{plotly2017,\n title = {plotly: Create Interactive Web Graphics via \'plotly.js\'},\n- author = {Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and Marianne Corvellec and Pedro Despouy},\n+ author = {Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and\n+ Marianne Corvellec and Pedro Despouy},\n year = {2017},\n note = {R package version 4.6.0},\n url = {https://CRAN.R-project.org/package=plotly},\n }\n </citation>\n <citation type="bibtex">\n- @misc{highcharter2017,\n- title = {highcharter: A Wrapper for the \'Highcharts\' Library},\n- author = {Joshua Kunst},\n- year = {2017},\n- note = {R package version 0.5.0},\n- url = {https://CRAN.R-project.org/package=highcharter},\n- }\n- </citation>\n- <citation type="bibtex">\n- @misc{formattable2016,\n- title = {formattable: Create \'Formattable\' Data Structures},\n- author = {Kun Ren and Kenton Russell},\n- year = {2016},\n- note = {R package version 0.2.0.1},\n- url = {https://CRAN.R-project.org/package=formattable},\n- }\n- </citation>\n- <citation>\n @article{ewels2016multiqc,\n title={MultiQC: summarize analysis results for multiple tools and samples in a single report},\n author={Ewels, Philip and Magnusson, M{\\aa}ns and Lundin, Sverker and K{\\"a}ller, Max},\n'

diff -r 600c39b11913 -r 507eec497730 fastqc_site_render.R
--- a/fastqc_site_render.R Tue Aug 15 15:50:21 2017 -0400
+++ b/fastqc_site_render.R Tue Nov 07 16:52:24 2017 -0500

[

b'@@ -1,195 +1,283 @@\n-##======= Handle arguments from command line ========\n-# setup R error handline to go to stderr\n-options(show.error.messages=FALSE,\n- error=function(){\n- cat(geterrmessage(), file=stderr())\n- quit("no", 1, F)\n- })\n-\n-# we need that to not crash galaxy with an UTF8 error on German LC settings.\n-loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")\n-\n-# suppress warning\n-options(warn = -1)\n-\n-options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)\n-args = commandArgs(trailingOnly=TRUE)\n-\n-suppressPackageStartupMessages({\n- library(getopt)\n- library(tools)\n-})\n-\n-# column 1: the long flag name\n-# column 2: the short flag alias. A SINGLE character string\n-# column 3: argument mask\n-# 0: no argument\n-# 1: argument required\n-# 2: argument is optional\n-# column 4: date type to which the flag\'s argument shall be cast.\n-# possible values: logical, integer, double, complex, character.\n-spec_list=list()\n-\n-##------- 1. input data ---------------------\n-spec_list$READS = c(\'reads\', \'r\', \'1\', \'character\')\n-spec_list$ECHO = c(\'echo\', \'e\', \'1\', \'character\')\n-\n-##--------2. output report and report site directory --------------\n-spec_list$FASTQC_SITE = c(\'fastqc_site\', \'o\', \'1\', \'character\')\n-spec_list$FASTQC_SITE_DIR = c(\'fastqc_site_dir\', \'d\', \'1\', \'character\')\n-\n-##--------3. Rmd templates sitting in the tool directory ----------\n-\n- ## _site.yml and index.Rmd files\n- spec_list$SITE_YML = c(\'site_yml\', \'s\', 1, \'character\')\n- spec_list$INDEX_Rmd = c(\'index_rmd\', \'i\', 1, \'character\')\n- \n- ## other Rmd body template files\n- spec_list$x01 = c(\'x01_evaluation_overview\', \'p\', \'1\', \'character\')\n- spec_list$x02 = c(\'x02_fastqc_original_reports\', \'a\', \'1\', \'character\')\n- spec_list$x1 = c(\'x1_per_base_quality_scores\', \'b\', \'1\', \'character\')\n- spec_list$x2 = c(\'x2_per_base_N_content\', \'c\', \'1\', \'character\')\n- spec_list$x3 = c(\'x3_per_sequence_quality_scores\', \'f\', \'1\', \'character\')\n- spec_list$x4 = c(\'x4_per_sequence_GC_content\', \'g\', \'1\', \'character\')\n- spec_list$x5 = c(\'x5_per_base_sequence_content\', \'h\', \'1\', \'character\')\n-\n-##------------------------------------------------------------------\n-\n-spec = t(as.data.frame(spec_list))\n-opt = getopt(spec)\n-# arguments are accessed by long flag name (the first column in the spec matrix)\n-# NOT by element name in the spec_list\n-# example: opt$help, opt$expression_file\n-##====== End of arguments handling ==========\n-\n-#------ Load libraries ---------\n+library(getopt)\n library(rmarkdown)\n+library(htmltools)\n library(plyr)\n+library(dplyr)\n library(stringr)\n-library(dplyr)\n library(highcharter)\n library(DT)\n library(reshape2)\n library(plotly)\n library(formattable)\n-library(htmltools)\n-\n+options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)\n \n-#----- 1. create the report directory ------------------------\n-paste0(\'mkdir -p \', opt$fastqc_site_dir) %>%\n- system()\n-\n-#----- 2. generate Rmd files with Rmd templates --------------\n-# a. templates without placeholder variables:\n-# copy templates from tool directory to the working directory.\n-# b. templates with placeholder variables:\n-# substitute variables with user input values and place them in the working directory.\n+##============ Sink warnings and errors to a file ==============\n+## use the sink() function to wrap all code within it.\n+##==============================================================\n+zz = file(\'warnings_and_errors.txt\')\n+sink(zz)\n+sink(zz, type = \'message\')\n+ ##---------below is the code for rendering .Rmd templates-----\n+ \n+ ##=============STEP 1: handle command line arguments==========\n+ ##\n+ ##============================================================\n+ # column 1: the long flag name\n+ # column 2: the short flag alias. A SINGLE character string\n+ # column 3: argument mask\n+ # 0: no argument\n+ # 1: argument required\n+ # 2: argument i'..b' {\n+ gsub(\'ECHO\', opt$echo, x)\n+ }) %>%\n+ (function(x) {\n+ gsub(\'REPORT_DIR\', opt$report_dir, x)\n+ }) %>%\n+ (function(x) {\n+ fileConn = file(\'x06_per_sequence_gc_content.Rmd\')\n+ writeLines(x, con=fileConn)\n+ close(fileConn)\n+ })\n+ \n+ # 07_per_base_n_content.Rmd\n+ readLines(opt$x07_per_base_n_content) %>%\n+ (function(x) {\n+ gsub(\'ECHO\', opt$echo, x)\n+ }) %>%\n+ (function(x) {\n+ gsub(\'REPORT_DIR\', opt$report_dir, x)\n+ }) %>%\n+ (function(x) {\n+ fileConn = file(\'x07_per_base_n_content.Rmd\')\n+ writeLines(x, con=fileConn)\n+ close(fileConn)\n+ })\n \n- #----- 02_fastqc_original_reports.Rmd -------------------\n- readLines(opt$x02_fastqc_original_reports) %>%\n- (function(x) {\n- gsub(\'ECHO\', opt$echo, x)\n- }) %>%\n- (function(x) {\n- gsub(\'REPORT_OUTPUT_DIR\', opt$fastqc_site_dir, x)\n- }) %>%\n- (function(x) {\n- fileConn = file(\'02_fastqc_original_reports.Rmd\')\n- writeLines(x, con=fileConn)\n- close(fileConn)\n- })\n+ # 08_sequence_length_distribution.Rmd\n+ readLines(opt$x08_sequence_length_distribution) %>%\n+ (function(x) {\n+ gsub(\'ECHO\', opt$echo, x)\n+ }) %>%\n+ (function(x) {\n+ gsub(\'REPORT_DIR\', opt$report_dir, x)\n+ }) %>%\n+ (function(x) {\n+ fileConn = file(\'x08_sequence_length_distribution.Rmd\')\n+ writeLines(x, con=fileConn)\n+ close(fileConn)\n+ })\n+ \n+ # 09_sequence_duplication_levels.Rmd\n+ readLines(opt$x09_sequence_duplication_levels) %>%\n+ (function(x) {\n+ gsub(\'ECHO\', opt$echo, x)\n+ }) %>%\n+ (function(x) {\n+ gsub(\'REPORT_DIR\', opt$report_dir, x)\n+ }) %>%\n+ (function(x) {\n+ fileConn = file(\'x09_sequence_duplication_levels.Rmd\')\n+ writeLines(x, con=fileConn)\n+ close(fileConn)\n+ })\n+ \n+ # 10_adapter_content.Rmd\n+ readLines(opt$x10_adapter_content) %>%\n+ (function(x) {\n+ gsub(\'ECHO\', opt$echo, x)\n+ }) %>%\n+ (function(x) {\n+ gsub(\'REPORT_DIR\', opt$report_dir, x)\n+ }) %>%\n+ (function(x) {\n+ fileConn = file(\'x10_adapter_content.Rmd\')\n+ writeLines(x, con=fileConn)\n+ close(fileConn)\n+ })\n+ \n+ # 11_kmer_content.Rmd\n+ readLines(opt$x11_kmer_content) %>%\n+ (function(x) {\n+ gsub(\'ECHO\', opt$echo, x)\n+ }) %>%\n+ (function(x) {\n+ gsub(\'REPORT_DIR\', opt$report_dir, x)\n+ }) %>%\n+ (function(x) {\n+ fileConn = file(\'x11_kmer_content.Rmd\')\n+ writeLines(x, con=fileConn)\n+ close(fileConn)\n+ })\n+ \n+ ##=============STEP 5: render all .Rmd templates=================\n+ ##\n+ ##===========================================================\n+ extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") {\n+ f = readLines(fastqc_data)\n+ start_line = grep(module_name, f)\n+ end_module_lines = grep(\'END_MODULE\', f)\n+ end_line = end_module_lines[which(end_module_lines > start_line)[1]]\n+ module_data = f[(start_line+1):(end_line-1)]\n+ writeLines(module_data, \'temp.txt\')\n+ read.csv(\'temp.txt\', sep = \'\\t\', header = header, comment.char = comment.char)\n+ }\n+ render_site()\n+ \n+ ##=============STEP 6: manipulate outputs====================\n+ ##\n+ ##===========================================================\n+ file.copy(\'my_site/index.html\', opt$report_html, recursive = TRUE)\n+ system(paste0(\'cp -r my_site/* \', opt$report_dir))\n \n \n-\n-#------ 3. render all Rmd files with render_site() --------\n-render_site() \n-\n-\n-#-------4. manipulate outputs -----------------------------\n-# a. copy index.html to the report output path\n-# b. copy all files in \'my_site\' to the report output directory\n-file.copy(\'my_site/index.html\', opt$fastqc_site, recursive=TRUE)\n-paste0(\'cp -r my_site/* \', opt$fastqc_site_dir) %>%\n- system()\n-\n-\n+ ##--------end of code rendering .Rmd templates----------------\n+sink()\n+##=========== End of sinking output=============================\n\\ No newline at end of file\n'

diff -r 600c39b11913 -r 507eec497730 index.Rmd
--- a/index.Rmd Tue Aug 15 15:50:21 2017 -0400
+++ b/index.Rmd Tue Nov 07 16:52:24 2017 -0500

@@ -4,7 +4,7 @@
---

```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
+knitr::opts_chunk$set(echo = TRUE, error = TRUE)
```