Mercurial > repos > mingchen0919 > aurora_fastqc_site
changeset 0:b7c115edd970 draft
planemo upload
author | mingchen0919 |
---|---|
date | Tue, 27 Feb 2018 10:37:12 -0500 |
parents | |
children | 92e3de11b9f8 |
files | 01_evaluation_overview.Rmd 02_per_base_sequence_quality.Rmd 03_per_tile_sequence_quality.Rmd 04_per_sequence_quality_score.Rmd 05_per_base_sequence_content.Rmd 06_per_sequence_gc_content.Rmd 07_per_base_n_content.Rmd 08_sequence_length_distribution.Rmd 09_sequence_duplication_levels.Rmd 10_adapter_content.Rmd 11_kmer_content.Rmd _site.yml fastqc_report.Rmd fastqc_site.xml fastqc_site_render.R index.Rmd |
diffstat | 16 files changed, 805 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/01_evaluation_overview.Rmd Tue Feb 27 10:37:12 2018 -0500 @@ -0,0 +1,95 @@ +--- +title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE, + eval = TRUE +) +``` + +# Run FastQC + +```{bash eval=TRUE,echo=FALSE} +cd ${X_d} +cp ${X_r} ${X_d}/read_1.fq +cp ${X_R} ${X_d}/read_2.fq + +cat >temp.sh <<EOL +fastqc \\ + -q \\ + -c ${X_c} \\ + -l ${X_l} \\ + ${X_d}/read_1.fq > /dev/null 2>&1 + +fastqc \\ + -q \\ + -c ${X_c} \\ + -l ${X_l} \\ + ${X_d}/read_2.fq > /dev/null 2>&1 +EOL + +grep -v None temp.sh > fastqc.sh + +# run fastqc +sh fastqc.sh + +# unzip outputs +unzip -q read_1_fastqc.zip +unzip -q read_2_fastqc.zip +``` + +```{r} +# display fastqc job script +fastqc_sh = paste0(opt$X_d, '/fastqc.sh') +tags$code(tags$pre(readChar(fastqc_sh, file.info(fastqc_sh)$size ))) +``` + +# Raw FastQC reports + +## Before trimming +```{r eval=TRUE} +ori_html = tags$a(href = 'read_1_fastqc/fastqc_report.html', opt$X_n) +ori_fastqc_data = tags$a(href = 'read_1_fastqc/fastqc_data.txt', 'fastqc_data.txt') +ori_summary = tags$a(href = 'read_1_fastqc/summary.txt', 'summary.txt') +tags$ul( + tags$li(ori_html), + tags$li(ori_fastqc_data), + tags$li(ori_summary) + ) +``` + +## After trimming +```{r eval=TRUE} +ori_html = tags$a(href = 'read_2_fastqc/fastqc_report.html', opt$X_n) +ori_fastqc_data = tags$a(href = 'read_2_fastqc/fastqc_data.txt', 'fastqc_data.txt') +ori_summary = tags$a(href = 'read_2_fastqc/summary.txt', 'summary.txt') +tags$ul( + tags$li(ori_html), + tags$li(ori_fastqc_data), + tags$li(ori_summary) + ) +``` + + +# Fastqc Output Visualization + +## Overview + +```{r eval=TRUE} +read_1_summary = read.csv(paste0(opt$X_d, '/read_1_fastqc/summary.txt'), header = FALSE, sep = '\t')[, 2:1] +read_2_summary = read.csv(paste0(opt$X_d, '/read_2_fastqc/summary.txt'), header = FALSE, sep = '\t')[, 1] +combined_summary = cbind(read_1_summary, read_2_summary) +names(combined_summary) = c('MODULE', paste0(opt$X_n, '(before)'), paste0(opt$X_N, '(after)')) +combined_summary[combined_summary == 'FAIL'] = 'FAIL (X)' +combined_summary[combined_summary == 'WARN'] = 'WARN (!)' +knitr::kable(combined_summary) +```
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/02_per_base_sequence_quality.Rmd Tue Feb 27 10:37:12 2018 -0500 @@ -0,0 +1,62 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE, + eval = TRUE +) +``` + + +```{r 'function definition', echo=FALSE} +# Define a function to extract outputs for each module from fastqc output +extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") { + f = readLines(fastqc_data) + start_line = grep(module_name, f) + end_module_lines = grep('END_MODULE', f) + end_line = end_module_lines[which(end_module_lines > start_line)[1]] + module_data = f[(start_line+1):(end_line-1)] + writeLines(module_data, '/tmp/temp.txt') + read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char) +} +``` + +# Per base sequence quality + +```{r 'per base sequence quality', fig.width=10} +## reads 1 +pbsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence quality') +pbsq_1$id = 1:length(pbsq_1$X.Base) +pbsq_1$trim = 'before' + +## reads 2 +pbsq_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base sequence quality') +pbsq_2$id = 1:length(pbsq_2$X.Base) +pbsq_2$trim = 'after' + +comb_pbsq = rbind(pbsq_1, pbsq_2) +comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim) + +p = ggplot(data = comb_pbsq) + + geom_boxplot(mapping = aes(x = id, + lower = Lower.Quartile, + upper = Upper.Quartile, + middle = Median, + ymin = X10th.Percentile, + ymax = X90th.Percentile, + fill = "yellow"), + stat = 'identity') + + geom_line(mapping = aes(x = id, y = Mean, color = "red")) + + scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) + + scale_fill_identity() + + scale_color_identity() + + ylim(0, max(comb_pbsq$Upper.Quartile) + 5) + + xlab('Position in read (bp)') + + facet_grid(. ~ trim) + + theme(axis.text.x = element_text(angle=45)) +p + +```
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/03_per_tile_sequence_quality.Rmd Tue Feb 27 10:37:12 2018 -0500 @@ -0,0 +1,44 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE, + eval = TRUE +) +``` + +# Per tile sequence quality + +```{r 'per tile sequence quality', fig.width=10} +## check if 'per tile sequence quality' module exits or not +check_ptsq = grep('Per tile sequence quality', readLines(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'))) +if (length(check_ptsq) > 0) { + ## reads 1 + ptsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per tile sequence quality') + ptsq_1$trim = 'before' + + ## reads 2 + ptsq_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per tile sequence quality') + ptsq_2$trim = 'after' + + comb_ptsq = rbind(ptsq_1, ptsq_2) + comb_ptsq$trim = factor(levels = c('before', 'after'), comb_ptsq$trim) + comb_ptsq$Base = factor(levels = unique(comb_ptsq$Base), comb_ptsq$Base) + + # convert integers to charaters + comb_ptsq$Tile = as.character(comb_ptsq$X.Tile) + + p = ggplot(data = comb_ptsq, aes(x = Base, y = Tile, fill = Mean)) + + geom_raster() + + facet_grid(. ~ trim) + + xlab('Position in read (bp)') + + ylab('') + + theme(axis.text.x = element_text(angle=45)) + ggplotly(p) +} else { + print('No "per tile sequence quality" data') +} +``` \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/04_per_sequence_quality_score.Rmd Tue Feb 27 10:37:12 2018 -0500 @@ -0,0 +1,35 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE, + eval = TRUE +) +``` + + +# Per sequence quality score + +```{r 'Per sequence quality score', fig.width=10} +## reads 1 +psqs_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per sequence quality scores') +psqs_1$trim = 'before' + +## reads 2 +psqs_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per sequence quality scores') +psqs_2$trim = 'after' + +comb_psqs = rbind(psqs_1, psqs_2) +comb_psqs$trim = factor(levels = c('before', 'after'), comb_psqs$trim) + +p = ggplot(data = comb_psqs, aes(x = X.Quality, y = Count)) + + geom_line(color = 'red') + + facet_grid(. ~ trim) + + xlim(min(comb_psqs$X.Quality), max(comb_psqs$X.Quality)) + + xlab('Mean Sequence Qaulity (Phred Score)') + + ylab('') +ggplotly(p) +```
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/05_per_base_sequence_content.Rmd Tue Feb 27 10:37:12 2018 -0500 @@ -0,0 +1,43 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE, + eval = TRUE +) +``` + + +# Per base sequence content + +```{r 'Per base sequence content', fig.width=10} +## reads 1 +pbsc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence content') +pbsc_1$id = 1:length(pbsc_1$X.Base) + +melt_pbsc_1 = melt(pbsc_1, id=c('X.Base', 'id')) +melt_pbsc_1$trim = 'before' + + +## reads 2 +pbsc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base sequence content') +pbsc_2$id = 1:length(pbsc_2$X.Base) + +melt_pbsc_2 = melt(pbsc_2, id=c('X.Base', 'id')) +melt_pbsc_2$trim = 'after' + +comb_pbsc = rbind(melt_pbsc_1, melt_pbsc_2) +comb_pbsc$trim = factor(levels = c('before', 'after'), comb_pbsc$trim) + +p = ggplot(data = comb_pbsc, aes(x = id, y = value, color = variable)) + + geom_line() + + facet_grid(. ~ trim) + + xlim(min(comb_pbsc$id), max(comb_pbsc$id)) + + ylim(0, 100) + + xlab('Position in read (bp)') + + ylab('') +ggplotly(p) +``` \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/06_per_sequence_gc_content.Rmd Tue Feb 27 10:37:12 2018 -0500 @@ -0,0 +1,33 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE, + eval = TRUE +) +``` + +# Per sequence GC content + +```{r 'Per sequence GC content', fig.width=10} +## reads 1 +psGCc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per sequence GC content') +psGCc_1$trim = 'before' + +## reads 2 +psGCc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per sequence GC content') +psGCc_2$trim = 'after' + +comb_psGCc = rbind(psGCc_1, psGCc_2) +comb_psGCc$trim = factor(levels = c('before', 'after'), comb_psGCc$trim) + +p = ggplot(data = comb_psGCc, aes(x = X.GC.Content, y = Count)) + + geom_line(color = 'red') + + facet_grid(. ~ trim) + + xlab('Mean Sequence Qaulity (Phred Score)') + + ylab('') +ggplotly(p) +``` \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/07_per_base_n_content.Rmd Tue Feb 27 10:37:12 2018 -0500 @@ -0,0 +1,38 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE, + eval = TRUE +) +``` + +# Per base N content + +```{r 'Per base N content', fig.width=10} +## reads 1 +pbNc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base N content') +pbNc_1$id = 1:length(pbNc_1$X.Base) +pbNc_1$trim = 'before' + +## reads 2 +pbNc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base N content') +pbNc_2$id = 1:length(pbNc_2$X.Base) +pbNc_2$trim = 'after' + +comb_pbNc = rbind(pbNc_1, pbNc_2) +comb_pbNc$trim = factor(levels = c('before', 'after'), comb_pbNc$trim) + +p = ggplot(data = comb_pbNc, aes(x = id, y = N.Count)) + + geom_line(color = 'red') + + scale_x_continuous(breaks = pbNc_2$id, labels = pbNc_2$X.Base) + + facet_grid(. ~ trim) + + ylim(0, 1) + + xlab('N-Count') + + ylab('') + + theme(axis.text.x = element_text(angle=45)) +ggplotly(p) +``` \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/08_sequence_length_distribution.Rmd Tue Feb 27 10:37:12 2018 -0500 @@ -0,0 +1,37 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE, + eval = TRUE +) +``` + +# Sequence Length Distribution + +```{r 'Sequence Length Distribution', fig.width=10} +## reads 1 +sld_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Sequence Length Distribution') +sld_1$id = 1:length(sld_1$X.Length) +sld_1$trim = 'before' + +## reads 2 +sld_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Sequence Length Distribution') +sld_2$id = 1:length(sld_2$X.Length) +sld_2$trim = 'after' + +comb_sld = rbind(sld_1, sld_2) +comb_sld$trim = factor(levels = c('before', 'after'), comb_sld$trim) + +p = ggplot(data = comb_sld, aes(x = id, y = Count)) + + geom_line(color = 'red') + + scale_x_continuous(breaks = sld_2$id, labels = sld_2$X.Length) + + facet_grid(. ~ trim) + + xlab('Sequence Length (bp)') + + ylab('') + + theme(axis.text.x = element_text(angle=45)) +ggplotly(p) +``` \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/09_sequence_duplication_levels.Rmd Tue Feb 27 10:37:12 2018 -0500 @@ -0,0 +1,45 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE, + eval = TRUE +) +``` + + +# Sequence Duplication Levels + +```{r 'Sequence Duplication Levels', fig.width=10} +## reads 1 +sdl_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Sequence Duplication Levels', header = FALSE, comment.char = '#') +names(sdl_1) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total') +sdl_1$id = 1:length(sdl_1$Duplication_Level) + +melt_sdl_1 = melt(sdl_1, id=c('Duplication_Level', 'id')) +melt_sdl_1$trim = 'before' + + +## reads 2 +sdl_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Sequence Duplication Levels', header = FALSE, comment.char = '#') +names(sdl_2) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total') +sdl_2$id = 1:length(sdl_2$Duplication_Level) + +melt_sdl_2 = melt(sdl_2, id=c('Duplication_Level', 'id')) +melt_sdl_2$trim = 'after' + +comb_sdl = rbind(melt_sdl_1, melt_sdl_2) +comb_sdl$trim = factor(levels = c('before', 'after'), comb_sdl$trim) + +p = ggplot(data = comb_sdl, aes(x = id, y = value, color = variable)) + + geom_line() + + scale_x_continuous(breaks = sdl_2$id, labels = sdl_2$Duplication_Level) + + facet_grid(. ~ trim) + + xlab('Sequence Duplication Level') + + ylab('') + + theme(axis.text.x = element_text(angle=45)) +ggplotly(p) +``` \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/10_adapter_content.Rmd Tue Feb 27 10:37:12 2018 -0500 @@ -0,0 +1,41 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE, + eval = TRUE +) +``` + +# Adapter Content + +```{r 'Adapter Content', fig.width=10} +## reads 1 +ac_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Adapter Content') +ac_1$id = 1:length(ac_1$X.Position) + +melt_ac_1 = melt(ac_1, id=c('X.Position', 'id')) +melt_ac_1$trim = 'before' + +## reads 2 +ac_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Adapter Content') +ac_2$id = 1:length(ac_2$X.Position) + +melt_ac_2 = melt(ac_2, id=c('X.Position', 'id')) +melt_ac_2$trim = 'after' + +comb_ac = rbind(melt_ac_1, melt_ac_2) +comb_ac$trim = factor(levels = c('before', 'after'), comb_ac$trim) + +p = ggplot(data = comb_ac, aes(x = id, y = value, color = variable)) + + geom_line() + + facet_grid(. ~ trim) + + xlim(min(comb_ac$id), max(comb_ac$id)) + + ylim(0, 1) + + xlab('Position in read (bp)') + + ylab('') +ggplotly(p) +``` \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/11_kmer_content.Rmd Tue Feb 27 10:37:12 2018 -0500 @@ -0,0 +1,26 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE, + eval = TRUE +) +``` + +# Kmer Content {.tabset} + +## Before + +```{r 'Kmer Content (before)', fig.width=10} +kc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Kmer Content') +knitr::kable(kc_1) +``` + +## After +```{r 'Kmer Content (after)', fig.width=10} +kc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Kmer Content') +knitr::kable(kc_2) +```
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/_site.yml Tue Feb 27 10:37:12 2018 -0500 @@ -0,0 +1,37 @@ +name: "FastQC Website" +output_dir: "../_site" +navbar: + title: "FastQC" + type: inverse + left: + - text: "Home" + icon: fa-home + href: index.html + - text: "Evaluation Overview" + href: 01_evaluation_overview.html + - text: "Evaluation by data module" + menu: + - text: "Per Base Sequence Quality" + href: 02_per_base_sequence_quality.html + - text: "Per Tile Sequence Quality" + href: 03_per_tile_sequence_quality.html + - text: "Per Sequence Quality Score" + href: 04_per_sequence_quality_score.html + - text: "Per Base Sequence Content" + href: 05_per_base_sequence_content.html + - text: "Per Sequence GC Content" + href: 06_per_sequence_gc_content.html + - text: "Per Base N Content" + href: 07_per_base_n_content.html + - text: "Sequence Length Distribution" + href: 08_sequence_length_distribution.html + - text: "Sequence Duplication Levels" + href: 09_sequence_duplication_levels.html + - text: "Adapter Content" + href: 10_adapter_content.html + - text: "Kmer Content" + href: 11_kmer_content.html +output: + html_document: + theme: cosmo + highlight: textmate \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastqc_report.Rmd Tue Feb 27 10:37:12 2018 -0500 @@ -0,0 +1,69 @@ +--- +title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE, + eval = TRUE +) +``` + + + + + +## + +## + + +## + + +## + +## + +## + +### Kmer Content {.tabset} + +#### Before + +```{r 'Kmer Content (before)', fig.width=10} +kc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Kmer Content') +knitr::kable(kc_1) +``` + +#### After +```{r 'Kmer Content (after)', fig.width=10} +kc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Kmer Content') +knitr::kable(kc_2) +``` + + +# Session Info + +```{r 'session info'} +sessionInfo() +``` + +# References + +* Bioinformatics, Babraham (2014). FastQC. + +* Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob (2016). rmarkdown: Dynamic Documents for R, 2016. In R package version 0.9, 6. + +* Xie, Yihui (2015). Dynamic Documents with R and knitr, CRC Press, Vol.29. + +* Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and Marianne Corvellec and Pedro Despouy (2017). plotly: Create Interactive Web Graphics via 'plotly.js'. R package version 4.6.0. [Link] + +* Wickham, H. (2016). ggplot2: elegant graphics for data analysis. Springer. Chicago
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastqc_site.xml Tue Feb 27 10:37:12 2018 -0500 @@ -0,0 +1,109 @@ +<tool name="Fastqc site" id='aurora_fastqc_site' version="2.1.0"> + <description> + Evaluate short reads with FastQC software on a single or a paired of untrimmed and trimmed reads + files. + </description> + <requirements> + <requirement type="package" version="1.15.0.6-0">pandoc</requirement> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.3">r-rmarkdown</requirement> + <requirement type="package" version="1.8.4">r-plyr</requirement> + <requirement type="package" version="1.1.0">r-stringr</requirement> + <requirement type="package" version="0.2">r-dt</requirement> + <requirement type="package" version="1.4.2">r-reshape2</requirement> + <requirement type="package" version="4.5.6">r-plotly</requirement> + <requirement type="package" version="0.3.5">r-htmltools</requirement> + <requirement type="package" version="0.11.5">fastqc</requirement> + <requirement type="package" version="0.9.10">xorg-libxrender</requirement> + <requirement type="package" version="1.2.2">xorg-libsm</requirement> + <requirement type="package" version="6.0">unzip</requirement> + </requirements> + <stdio> + <regex match="XXX" source="stderr" level="warning" + description="Check the warnings_and_errors.txt file for more details."/> + </stdio> + <command><![CDATA[ + + + Rscript '${__tool_directory__}/fastqc_site_render.R' + -e $echo + -r $reads_1 + -n '$reads_1.name' + -R $reads_2 + -N '$reads_2.name' + -c $contaminants + -l $limits + + -o $report + -d $report.files_path + -s $sink_message + + -t '${__tool_directory__}' + + ]]></command> + <inputs> + <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="reads_1" type="data" optional="false" + label="Short reads before trimming" + help="Short reads data from history. This could be reads before trimming."/> + <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="reads_2" type="data" label="Short reads after trimming" + help="Short reads data from history. This could be reads after trimming."/> + <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list" + help="Specifies a non-default file which contains the list of adapter sequences which will be explicitly searched against the library. The file must contain sets of named adapters in the form name[tab]sequence. Lines prefixed with a hash will be ignored."/> + <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file" + help="Specifies a non-default file which contains a set of criteria which will be used to determine the warn/error limits for the various modules. This file can also be used to selectively remove some modules from the output all together. The format needs to mirror the default limits.txt file found in the Configuration folder."/> + <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" + label="Display analysis code in report?"/> + </inputs> + <outputs> + <data format="html" name="report" label="fastqc site"/> + <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/> + </outputs> + <citations> + <citation type="bibtex"><![CDATA[ + @misc{bioinformatics2014fastqc, + title={FastQC}, + author={Bioinformatics, Babraham}, + year={2014} + } + ]]></citation> + <citation type="bibtex"><![CDATA[ + @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff + and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} + } + ]]></citation> + <citation type="bibtex"><![CDATA[ + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + ]]></citation> + <citation type="bibtex"><![CDATA[ + @misc{plotly2017, + title = {plotly: Create Interactive Web Graphics via 'plotly.js'}, + author = {Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and + Marianne Corvellec and Pedro Despouy}, + year = {2017}, + note = {R package version 4.6.0}, + url = {https://CRAN.R-project.org/package=plotly}, + } + ]]></citation> + <citation type="bibtex"><![CDATA[ + @Book{ggplot22016, + author = {Hadley Wickham}, + title = {ggplot2: Elegant Graphics for Data Analysis}, + publisher = {Springer-Verlag New York}, + year = {2009}, + isbn = {978-0-387-98140-6}, + url = {http://ggplot2.org}, + } + ]]></citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastqc_site_render.R Tue Feb 27 10:37:12 2018 -0500 @@ -0,0 +1,69 @@ +##============ Sink warnings and errors to a file ============== +## use the sink() function to wrap all code within it. +##============================================================== +zz = file('warnings_and_errors.txt') +sink(zz) +sink(zz, type = 'message') + +library(getopt) +library(rmarkdown) +library(htmltools) +library(plyr) +library(dplyr) +library(stringr) +library(DT) +library(reshape2) +library(plotly) +options(stringsAsFactors = FALSE) + +# getopt_specification_matrix(extract_short_flags('fastqc_report.xml')) %>% +# write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE) + + +# get arguments into R +spec_matrix = as.matrix( + data.frame(stringsAsFactors=FALSE, + long_flags = c("X_e", "X_r", "X_n", "X_R", "X_N", "X_c", "X_l", + "X_o", "X_d", "X_s", "X_t"), + short_flags = c("e", "r", "n", "R", "N", "c", "l", "o", "d", "s", + "t"), + argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), + data_type_flags = c("character", "character", "character", "character", + "character", "character", "character", + "character", "character", "character", "character") + ) +) +opt = getopt(spec_matrix) + +# using passed arguments in R to define system environment variables +do.call(Sys.setenv, opt[-1]) + +# create the output associated directory to store all outputs +dir.create(opt$X_d, recursive = TRUE) + +#-----------------render site-------------- +# copy site generating materials into folder "_site" within the output associated directory +file.copy(opt$X_t, opt$X_d, recursive = TRUE) +# render site to the output associated directory +render_site(input = paste0(opt$X_d, '/aurora_fastqc_site')) +# remove site generating materials from output associated directory +unlink(paste0(opt$X_d, '/aurora_fastqc_site'), recursive = TRUE) +# move _site/* into output associated directory +move_cmd = paste0('mv ', opt$X_d, '/_site/* ', opt$X_d) +system(move_cmd) +#------------------------------------------ + +#-----link index.html to output----- +cp_index = paste0('cp ', opt$X_d, '/index.html ', opt$X_o) +system(cp_index) +#----------------------------------- + +#==============the end============== + + + +# file.copy(paste0(opt$X_d, '/index.html'), opt$X_o, recursive = TRUE) + +##--------end of code rendering .Rmd templates---------------- +sink() +##=========== End of sinking output============================= \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/index.Rmd Tue Feb 27 10:37:12 2018 -0500 @@ -0,0 +1,22 @@ +--- +title: "FastQC Report" +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set(echo = TRUE, error = TRUE) +``` + + + +## References + +* Bioinformatics, Babraham (2014). FastQC. + +* Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob (2016). rmarkdown: Dynamic Documents for R, 2016. In R package version 0.9, 6. + +* Xie, Yihui (2015). Dynamic Documents with R and knitr, CRC Press, Vol.29. + +* Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and Marianne Corvellec and Pedro Despouy (2017). plotly: Create Interactive Web Graphics via 'plotly.js'. R package version 4.6.0. [Link] + +* Wickham, H. (2016). ggplot2: elegant graphics for data analysis. Springer. Chicago