Mercurial > repos > mingchen0919 > rmarkdown_fastqc_site
changeset 0:d732d4526c6d draft
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit ddb1f6aca7619aea2e660b1729367841b56ba4c9-dirty
author | mingchen0919 |
---|---|
date | Tue, 08 Aug 2017 10:14:46 -0400 |
parents | |
children | 1fea15ac7532 |
files | 01_evaluation_overview.Rmd 02_fastqc_original_reports.Rmd 1_per_base_quality_scores.Rmd 2_per_base_N_content.Rmd 3_per_sequence_quality_scores.Rmd 4_per_sequence_GC_content.Rmd 5_per_base_sequence_content.Rmd _site.yml fastqc_site.xml fastqc_site_render.R index.Rmd |
diffstat | 11 files changed, 766 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/01_evaluation_overview.Rmd Tue Aug 08 10:14:46 2017 -0400 @@ -0,0 +1,123 @@ +--- +title: "Evaluation Overview" +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set(echo = ECHO) +``` + +```{bash 'copy data from datasets directory to working directory', echo=FALSE} +# Copy uploaded data to the working directory +for f in $(echo READS | sed "s/,/ /g") +do + cp $f ./ +done +``` + +```{bash 'run fastqc', echo=FALSE} +# run fastqc and place outputs into the report directory +for r in $(ls *.dat) +do + fastqc -o REPORT_OUTPUT_DIR $r > /dev/null 2>&1 +done +``` + +```{bash 'parse fastqc results', echo=FALSE} +##==== copy fastqc generated zip files from report output directory to job work directory == +cp -r REPORT_OUTPUT_DIR/*zip ./ + +# create a file to store data file paths +echo "sample_id,file_path" > PWF_file_paths.txt # Pass, Warning, Fail +echo "sample_id,file_path" > PBQS_file_paths.txt # Per Base Quality Score +echo "sample_id,file_path" > PSQS_file_paths.txt # Per Sequence Quality Score +echo "sample_id,file_path" > PSGC_file_paths.txt # Per Sequence GC Content +echo "sample_id,file_path" > PBSC_file_paths.txt # Per Base Sequence Content +echo "sample_id,file_path" > PBNC_file_paths.txt # Per Base N Content +echo "sample_id,file_path" > SDL_file_paths.txt # Sequence Duplication Level +echo "sample_id,file_path" > SLD_file_paths.txt # Sequence Length Distribution +echo "sample_id,file_path" > KMC_file_paths.txt # Kmer Content + +for i in $(ls *.zip) +do + BASE=$(echo $i | sed 's/\(.*\)\.zip/\1/g') + echo $BASE + unzip ${BASE}.zip > /dev/null 2>&1 + + ##====== pass,warning,fail (WSF) ============= + awk '/^>>/ {print}' "$BASE"/fastqc_data.txt | grep -v 'END_MODULE' | sed 's/>>//' > "$BASE"-PWF.txt + echo "${BASE},${BASE}-PWF.txt" >> PWF_file_paths.txt + + ##====== per base quality scores (PBQS) ====== + awk '/^>>Per base sequence quality/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-PBQS.txt + echo "${BASE},${BASE}-PBQS.txt" >> PBQS_file_paths.txt + + ##====== per sequence quality scores (PSQS) + awk '/^>>Per sequence quality scores/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-PSQS.txt + echo "${BASE},${BASE}-PSQS.txt" >> PSQS_file_paths.txt + + ##====== Per sequence GC content (PSGC) + awk '/^>>Per sequence GC content/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-PSGC.txt + echo "${BASE},${BASE}-PSGC.txt" >> PSGC_file_paths.txt + + ##====== Per Base Sequence Content (PBSC) + awk '/^>>Per base sequence content/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-PBSC.txt + echo "${BASE},${BASE}-PBSC.txt" >> PBSC_file_paths.txt + + ##====== Per Base N Content (PBNC) + awk '/^>>Per base N content/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-PBNC.txt + echo "${BASE},${BASE}-PBNC.txt" >> PBNC_file_paths.txt + + ##====== Sequence Duplication Level (SDL) + awk '/^>>Sequence Duplication Levels/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-SDL.txt + echo "${BASE},${BASE}-SDL.txt" >> SDL_file_paths.txt + + ##====== Sequence Length Distribution (SLD) + awk '/^>>Sequence Length Distribution/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-SLD.txt + echo "${BASE},${BASE}-SLD.txt" >> SLD_file_paths.txt + + ##====== Kmer Content ============ + awk '/^>>Kmer Content/ {flag=1; next} /END_MODULE/ {flag=0} flag' "$BASE"/fastqc_data.txt >"$BASE"-KMC.txt + echo "${BASE},${BASE}-KMC.txt" >> KMC_file_paths.txt + +done +``` + + +## Evaluation Overview + +```{r 'overview'} +PWF_file_paths = read.csv('PWF_file_paths.txt', + header = TRUE, stringsAsFactors = FALSE) +rm('PWF_df') +for(i in 1:nrow(PWF_file_paths)) { + file_path = PWF_file_paths[i,2] + pwf_df = read.csv(file_path, + sep='\t', header=FALSE, stringsAsFactors = FALSE) + colnames(pwf_df) = c('item', PWF_file_paths[i,1]) + if (!exists('PWF_df')) { + PWF_df = pwf_df + } else { + PWF_df = cbind(PWF_df, pwf_df[,2,drop=FALSE]) + } +} +``` + + +```{r} +my_icon = c('ok', 'remove', 'star') +names(my_icon) = c('pass', 'fail', 'warn') +evaluate_list = list() +for (i in colnames(PWF_df)[-1]) { + evaluate_list[[i]] = formatter( + "span", + style = x ~ style("background-color" = ifelse(x =='pass', '#9CD027', ifelse(x == 'fail', '#CC0000', '#FF4E00')), + "color" = "white", + "width" = "50px", + "float" = "left", + "padding-right" = "5px") + ) +} + +formattable(PWF_df, evaluate_list) +``` \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/02_fastqc_original_reports.Rmd Tue Aug 08 10:14:46 2017 -0400 @@ -0,0 +1,20 @@ +--- +title: "FastQC original reports" +output: html_document +--- + +```{r 'FastQC original reports', include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set(echo = ECHO) +``` + + +Below are links to ***Fastqc*** original html reports. + +```{r 'html report links'} +html_report_list = list() +html_files = list.files('REPORT_OUTPUT_DIR', pattern = '.*html') +for (i in html_files) { + html_report_list[[i]] = tags$li(tags$a(href=i, i)) +} +tags$ul(html_report_list) +``` \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/1_per_base_quality_scores.Rmd Tue Aug 08 10:14:46 2017 -0400 @@ -0,0 +1,62 @@ +--- +title: "Per Base Quality Scores" +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set(echo = ECHO) +``` + + +## Per Base Quality Scores + +```{r} +PBQS_df = data.frame() +PBQS_file_paths = read.csv('PBQS_file_paths.txt', + header = TRUE, stringsAsFactors = FALSE) +for(i in 1:nrow(PBQS_file_paths)) { + # file_path = paste0('REPORT_OUTPUT_DIR/', PBQS_file_paths[i,2]) + file_path = PBQS_file_paths[i,2] + pbqs_df = read.csv(file_path, + sep='\t', header=TRUE, stringsAsFactors = FALSE) %>% + mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]), + Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>% + (function (df) { + df1 = select(df, -Base2) + df2 = select(df, -Base1) %>% filter(Base2 != '') + colnames(df1) = c(colnames(df1)[1:7], 'Base') + colnames(df2) = c(colnames(df2)[1:7], 'Base') + res = rbind(df1, df2) %>% arrange(Base) + return(res) + }) + pbqs_df$sample_id = rep(PBQS_file_paths[i,1], nrow(pbqs_df)) + PBQS_df = rbind(PBQS_df, pbqs_df) +} +``` + + +```{r} +# datatable(PBQS_df) +max_phred = max(PBQS_df$Mean) + 10 +hchart(PBQS_df, "line", hcaes(x = Base, y = Mean, group = sample_id)) %>% + hc_title( + text = "Per Base Quality Score" + ) %>% + hc_yAxis( + title = list(text = "Mean Base Quality Score"), + min = 0, + max = max_phred, + plotLines = list( + list(label = list(text = "Phred Score = 27"), + width = 2, + dashStyle = "dash", + color = "green", + value = 27), + list(label = list(text = "Phred Score = 20"), + width = 2, + color = "red", + value = 20) + ) + ) %>% + hc_exporting(enabled = TRUE) +```
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/2_per_base_N_content.Rmd Tue Aug 08 10:14:46 2017 -0400 @@ -0,0 +1,58 @@ +--- +title: "Per Base N Content" +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set(echo = ECHO) +``` + +## Per Base N Content + +```{r} +PBNC_df = data.frame() +PBNC_file_paths = read.csv('PBNC_file_paths.txt', + header = TRUE, stringsAsFactors = FALSE) +for(i in 1:nrow(PBNC_file_paths)) { + # file_path = paste0('REPORT_OUTPUT_DIR/', PBNC_file_paths[i,2]) + file_path = PBNC_file_paths[i,2] + pbnc_df = read.csv(file_path, + sep='\t', header=TRUE, stringsAsFactors = FALSE) %>% + mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]), + Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>% + (function (df) { + df1 = select(df, -Base2) + df2 = select(df, -Base1) %>% filter(Base2 != '') + colnames(df1) = c(colnames(df1)[1:2], 'Base') + colnames(df2) = c(colnames(df2)[1:2], 'Base') + res = rbind(df1, df2) %>% arrange(Base) + return(res) + }) + pbnc_df$sample_id = rep(PBNC_file_paths[i,1], nrow(pbnc_df)) + PBNC_df = rbind(PBNC_df, pbnc_df) +} +``` + + +```{r} +PBNC_df$N.Count = PBNC_df$N.Count * 100 +max_phred = max(PBNC_df$N.Count) + 5 +hchart(PBNC_df, "line", hcaes(x = as.character(Base), y = N.Count, group = sample_id)) %>% + hc_title( + text = "Per Base N Content" + ) %>% + hc_xAxis( + title = list(text = "Base Position") + ) %>% + hc_yAxis( + title = list(text = "N %"), + plotLines = list( + list(label = list(text = "N = 5%"), + width = 2, + dashStyle = "dash", + color = "red", + value = 5) + ) + ) %>% + hc_exporting(enabled = TRUE) +```
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/3_per_sequence_quality_scores.Rmd Tue Aug 08 10:14:46 2017 -0400 @@ -0,0 +1,50 @@ +--- +title: "Per Sequence Quality Scores" +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set(echo = ECHO) +``` + +## Per Sequence Quality Scores + +```{r} +PSQS_df = data.frame() +PSQS_file_paths = read.csv('PSQS_file_paths.txt', + header = TRUE, stringsAsFactors = FALSE) +for(i in 1:nrow(PSQS_file_paths)) { + # file_path = paste0('REPORT_OUTPUT_DIR/', PSQS_file_paths[i,2]) + file_path = PSQS_file_paths[i,2] + psqs_df = read.csv(file_path, + sep='\t', header=TRUE, stringsAsFactors = FALSE) + psqs_df$sample_id = rep(PSQS_file_paths[i,1], nrow(psqs_df)) + PSQS_df = rbind(PSQS_df, psqs_df) +} +``` + + +```{r} +max_phred = max(PSQS_df$X.Quality) + 5 +hchart(PSQS_df, "line", hcaes(x = X.Quality, y = Count, group = sample_id)) %>% + hc_title( + text = "Per Sequence Quality Score" + ) %>% + hc_xAxis( + title = list(text = "Mean Sequence Quality Score"), + min = 0, + max = max_phred, + plotLines = list( + list(label = list(text = "Phred Score = 27"), + width = 2, + dashStyle = "dash", + color = "green", + value = 27), + list(label = list(text = "Phred Score = 20"), + width = 2, + color = "red", + value = 20) + ) + ) %>% + hc_exporting(enabled = TRUE) +```
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/4_per_sequence_GC_content.Rmd Tue Aug 08 10:14:46 2017 -0400 @@ -0,0 +1,38 @@ +--- +title: "Per Sequence GC Content" +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set(echo = ECHO) +``` + +## Per Sequence GC Content + + +```{r} +PSGC_df = data.frame() +PSGC_file_paths = read.csv('PSGC_file_paths.txt', + header = TRUE, stringsAsFactors = FALSE) +for(i in 1:nrow(PSGC_file_paths)) { + # file_path = paste0('REPORT_OUTPUT_DIR/', PSGC_file_paths[i,2]) + file_path = PSGC_file_paths[i,2] + psgc_df = read.csv(file_path, + sep='\t', header=TRUE, stringsAsFactors = FALSE) + psgc_df$sample_id = rep(PSGC_file_paths[i,1], nrow(psgc_df)) + PSGC_df = rbind(PSGC_df, psgc_df) +} +``` + + +```{r} +max_phred = max(PSGC_df$Count) + 5 +hchart(PSGC_df, "line", hcaes(x = X.GC.Content, y = Count, group = sample_id)) %>% + hc_title( + text = "Per Sequence GC Content" + ) %>% + hc_xAxis( + title = list(text = "% GC") + ) %>% + hc_exporting(enabled = TRUE) +```
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/5_per_base_sequence_content.Rmd Tue Aug 08 10:14:46 2017 -0400 @@ -0,0 +1,45 @@ +--- +title: "Per Base Sequence Content" +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set(echo = ECHO) +``` + +## Per Base Sequence Content + +```{r} +PBSC_df = data.frame() +PBSC_file_paths = read.csv('PBSC_file_paths.txt', + header = TRUE, stringsAsFactors = FALSE) +for(i in 1:nrow(PBSC_file_paths)) { + # file_path = paste0('REPORT_OUTPUT_DIR/', PBSC_file_paths[i,2]) + file_path = PBSC_file_paths[i,2] + pbsc_df = read.csv(file_path, + sep='\t', header=TRUE, stringsAsFactors = FALSE) %>% + mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]), + Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>% + (function (df) { + df1 = select(df, -Base2) + df2 = select(df, -Base1) %>% filter(Base2 != '') + colnames(df1) = c(colnames(df1)[1:5], 'Base') + colnames(df2) = c(colnames(df2)[1:5], 'Base') + res = rbind(df1, df2) %>% arrange(Base) + return(res) + }) + pbsc_df$sample_id = rep(PBSC_file_paths[i,1], nrow(pbsc_df)) + PBSC_df = rbind(PBSC_df, pbsc_df) +} +``` + + +```{r out.width="100%"} +PBSC_df_2 = select(PBSC_df, -X.Base) %>% + melt(id = c('Base', 'sample_id'), value.name = 'base_percentage') +p = ggplot(data = PBSC_df_2, aes(x = Base, y = base_percentage, group = variable, color = variable)) + + geom_line() + + facet_wrap(~ sample_id) +ggplotly(p) +``` +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/_site.yml Tue Aug 08 10:14:46 2017 -0400 @@ -0,0 +1,29 @@ +name: "FastQC Website" +output_dir: "my_site" +navbar: + title: "FastQC" + type: inverse + left: + - text: "Home" + icon: fa-home + href: index.html + - text: "Evaluation Overview" + href: 01_evaluation_overview.html + - text: "Evaluation Items" + menu: + - text: "Per Base Quality Scores" + href: 1_per_base_quality_scores.html + - text: "Per Base N Content" + href: 2_per_base_N_content.html + - text: "Per Sequence Quality Scores" + href: 3_per_sequence_quality_scores.html + - text: "Per Sequence GC Content" + href: 4_per_sequence_GC_content.html + - text: "Per Base Sequence Content" + href: 5_per_base_sequence_content.html + - text: "Original FastQC Reports" + href: 02_fastqc_original_reports.html +output: + html_document: + theme: cosmo + highlight: textmate \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastqc_site.xml Tue Aug 08 10:14:46 2017 -0400 @@ -0,0 +1,125 @@ +<tool id="fastqc_site" name="Fastqc Site" version="1.0.0"> + <requirements> + <requirement type="package" version="1.15.2">pandoc</requirement> + <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.2">r-rmarkdown</requirement> + <requirement type="package" version="1.8.4">r-plyr</requirement> + <requirement type="package" version="1.1.0">r-stringr</requirement> + <requirement type="package" version="0.5.0">r-highcharter</requirement> + <requirement type="package" version="0.2">r-dt</requirement> + <requirement type="package" version="1.4.2">r-reshape2</requirement> + <requirement type="package" version="4.5.6">r-plotly</requirement> + <requirement type="package" version="0.2.0.1">r-formattable</requirement> + <requirement type="package" version="0.3.5">r-htmltools</requirement> + <requirement type="package" version="0.11.5">fastqc</requirement> + </requirements> + <description> + Implements FastQC analysis and display results in R Markdown website. + </description> + <stdio> + <regex match="Execution halted" + source="both" + level="fatal" + description="Execution halted." /> + <regex match="Error in" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + <regex match="Fatal error" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + </stdio> + <command> + <![CDATA[ + + Rscript '${__tool_directory__}/fastqc_site_render.R' + + ## 1. input data + -r $reads + -e $echo + + ## 2. output report and report site directory + -o $fastqc_site + -d $fastqc_site.files_path + + ## 3. Rmd templates sitting in the tool directory + + ## _site.yml and index.Rmd template files + -s '${__tool_directory__}/_site.yml' + -i '${__tool_directory__}/index.Rmd' + + ## other Rmd body template files + -p '${__tool_directory__}/01_evaluation_overview.Rmd' + -a '${__tool_directory__}/02_fastqc_original_reports.Rmd' + -b '${__tool_directory__}/1_per_base_quality_scores.Rmd' + -c '${__tool_directory__}/2_per_base_N_content.Rmd' + -f '${__tool_directory__}/3_per_sequence_quality_scores.Rmd' + -g '${__tool_directory__}/4_per_sequence_GC_content.Rmd' + -h '${__tool_directory__}/5_per_base_sequence_content.Rmd' + + ]]> + </command> + <inputs> + <param format="fastq,fastq.gz,fastq.bz2,bam,sam" multiple="true" name="reads" type="data" label="Short reads data from history" /> + <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" /> + </inputs> + <outputs> + <data format="html" name="fastqc_site" label="fastqc site" /> + </outputs> + <citations> + <citation type="bibtex"> + @misc{bioinformatics2014fastqc, + title={FastQC}, + author={Bioinformatics, Babraham}, + year={2014} + } + </citation> + <citation type="bibtex"> + @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} + } + </citation> + <citation type="bibtex"> + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + </citation> + <citation type="bibtex"> + @Manual{, + title = {plotly: Create Interactive Web Graphics via 'plotly.js'}, + author = {Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and Marianne Corvellec and Pedro Despouy}, + year = {2017}, + note = {R package version 4.6.0}, + url = {https://CRAN.R-project.org/package=plotly}, + } + </citation> + <citation type="bibtex"> + @Manual{, + title = {highcharter: A Wrapper for the 'Highcharts' Library}, + author = {Joshua Kunst}, + year = {2017}, + note = {R package version 0.5.0}, + url = {https://CRAN.R-project.org/package=highcharter}, + } + </citation> + <citation type="bibtex"> + @Manual{, + title = {formattable: Create 'Formattable' Data Structures}, + author = {Kun Ren and Kenton Russell}, + year = {2016}, + note = {R package version 0.2.0.1}, + url = {https://CRAN.R-project.org/package=formattable}, + } + </citation> + </citations> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastqc_site_render.R Tue Aug 08 10:14:46 2017 -0400 @@ -0,0 +1,196 @@ +##======= Handle arguments from command line ======== +# setup R error handline to go to stderr +options(show.error.messages=FALSE, + error=function(){ + cat(geterrmessage(), file=stderr()) + quit("no", 1, F) + }) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +# suppress warning +options(warn = -1) + +options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) +args = commandArgs(trailingOnly=TRUE) + +suppressPackageStartupMessages({ + library(getopt) + library(tools) +}) + +# column 1: the long flag name +# column 2: the short flag alias. A SINGLE character string +# column 3: argument mask +# 0: no argument +# 1: argument required +# 2: argument is optional +# column 4: date type to which the flag's argument shall be cast. +# possible values: logical, integer, double, complex, character. +spec_list=list() + +##------- 1. input data --------------------- +spec_list$READS = c('reads', 'r', '1', 'character') +spec_list$ECHO = c('echo', 'e', '1', 'character') + +##--------2. output report and report site directory -------------- +spec_list$FASTQC_SITE = c('fastqc_site', 'o', '1', 'character') +spec_list$FASTQC_SITE_DIR = c('fastqc_site_dir', 'd', '1', 'character') + +##--------3. Rmd templates sitting in the tool directory ---------- + + ## _site.yml and index.Rmd files + spec_list$SITE_YML = c('site_yml', 's', 1, 'character') + spec_list$INDEX_Rmd = c('index_rmd', 'i', 1, 'character') + + ## other Rmd body template files + spec_list$x01 = c('x01_evaluation_overview', 'p', '1', 'character') + spec_list$x02 = c('x02_fastqc_original_reports', 'a', '1', 'character') + spec_list$x1 = c('x1_per_base_quality_scores', 'b', '1', 'character') + spec_list$x2 = c('x2_per_base_N_content', 'c', '1', 'character') + spec_list$x3 = c('x3_per_sequence_quality_scores', 'f', '1', 'character') + spec_list$x4 = c('x4_per_sequence_GC_content', 'g', '1', 'character') + spec_list$x5 = c('x5_per_base_sequence_content', 'h', '1', 'character') + +##------------------------------------------------------------------ + +spec = t(as.data.frame(spec_list)) +opt = getopt(spec) +# arguments are accessed by long flag name (the first column in the spec matrix) +# NOT by element name in the spec_list +# example: opt$help, opt$expression_file +##====== End of arguments handling ========== + +#------ Load libraries --------- +library(rmarkdown) +library(plyr) +library(stringr) +library(dplyr) +library(highcharter) +library(DT) +library(reshape2) +library(Kmisc) +library(plotly) +library(formattable) +library(htmltools) + + +#----- 1. create the report directory ------------------------ +paste0('mkdir -p ', opt$fastqc_site_dir) %>% + system() + +#----- 2. generate Rmd files with Rmd templates -------------- +# a. templates without placeholder variables: +# copy templates from tool directory to the working directory. +# b. templates with placeholder variables: +# substitute variables with user input values and place them in the working directory. + + + #----- Copy index.Rmd and _site.yml files to job working direcotry ----- + file.copy(opt$index_rmd, 'index.Rmd', recursive=TRUE) + file.copy(opt$site_yml, '_site.yml', recursive=TRUE) + #--------------------------------------------------------- + + #----- 01_evaluation_overview.Rmd ----------------------- + readLines(opt$x01_evaluation_overview) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + gsub('READS', opt$reads, x) + }) %>% + (function(x) { + gsub('REPORT_OUTPUT_DIR', opt$fastqc_site_dir, x) + }) %>% + (function(x) { + fileConn = file('01_evaluation_overview.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + + #----- 1_per_base_quality_scores.Rmd -------------------- + readLines(opt$x1_per_base_quality_scores) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + fileConn = file('1_per_base_quality_scores.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + + #----- 2_per_base_N_content.Rmd ------------------------- + readLines(opt$x2_per_base_N_content) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + fileConn = file('2_per_base_N_content.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + + #----- 3_per_sequence_quality_scores.Rmd ---------------- + readLines(opt$x3_per_sequence_quality_scores) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + fileConn = file('3_per_sequence_quality_scores.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + + + #----- 4_per_sequence_GC_content.Rmd -------------------- + readLines(opt$x4_per_sequence_GC_content) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + fileConn = file('4_per_sequence_GC_content.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + + + #----- 5_per_base_sequence_content.Rmd ------------------ + readLines(opt$x5_per_base_sequence_content) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + fileConn = file('5_per_base_sequence_content.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + + #----- 02_fastqc_original_reports.Rmd ------------------- + readLines(opt$x02_fastqc_original_reports) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + gsub('REPORT_OUTPUT_DIR', opt$fastqc_site_dir, x) + }) %>% + (function(x) { + fileConn = file('02_fastqc_original_reports.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + + + +#------ 3. render all Rmd files with render_site() -------- +render_site() + + +#-------4. manipulate outputs ----------------------------- +# a. copy index.html to the report output path +# b. copy all files in 'my_site' to the report output directory +file.copy('my_site/index.html', opt$fastqc_site, recursive=TRUE) +paste0('cp -r my_site/* ', opt$fastqc_site_dir) %>% + system() + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/index.Rmd Tue Aug 08 10:14:46 2017 -0400 @@ -0,0 +1,20 @@ +--- +title: "FastQC Report" +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + + + +## References + +* Andrews, Simon. "FastQC: a quality control tool for high throughput sequence data." (2010): 175-176. +* Goecks, Jeremy, Anton Nekrutenko, and James Taylor. "Galaxy: a comprehensive approach for supporting accessible, reproducible, and transparent computational research in the life sciences." Genome biology 11.8 (2010): R86. +* Afgan, Enis, et al. "The Galaxy platform for accessible, reproducible and collaborative biomedical analyses: 2016 update." Nucleic acids research (2016): gkw343. +* Highcharts. https://www.highcharts.com/. (access by May 26, 2017). +* R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/. +* Joshua Kunst (2017). highcharter: A Wrapper for the 'Highcharts' Library. R package version 0.5.0. https://CRAN.R-project.org/package=highcharter +* Carson Sievert, Chris Parmer, Toby Hocking, Scott Chamberlain, Karthik Ram, Marianne Corvellec and Pedro Despouy (2017). plotly: Create Interactive Web Graphics via 'plotly.js'. R package version 4.6.0. https://CRAN.R-project.org/package=plotly