Previous changeset 10:600c39b11913 (2017-08-15) Next changeset 12:68ea2ebbf866 (2017-11-09) |
Commit message:
update fastqc site |
modified:
01_evaluation_overview.Rmd _site.yml fastqc_site.xml fastqc_site_render.R index.Rmd |
added:
02_per_base_sequence_quality.Rmd 03_per_tile_sequence_quality.Rmd 04_per_sequence_quality_score.Rmd 05_per_base_sequence_content.Rmd 06_per_sequence_gc_content.Rmd 07_per_base_n_content.Rmd 08_sequence_length_distribution.Rmd 09_sequence_duplication_levels.Rmd 10_adapter_content.Rmd 11_kmer_content.Rmd |
removed:
02_fastqc_original_reports.Rmd 1_per_base_quality_scores.Rmd 2_per_base_N_content.Rmd 3_per_sequence_quality_scores.Rmd 4_per_sequence_GC_content.Rmd 5_per_base_sequence_content.Rmd |
b |
diff -r 600c39b11913 -r 507eec497730 01_evaluation_overview.Rmd --- a/01_evaluation_overview.Rmd Tue Aug 15 15:50:21 2017 -0400 +++ b/01_evaluation_overview.Rmd Tue Nov 07 16:52:24 2017 -0500 |
[ |
b'@@ -1,123 +1,124 @@\n ---\n-title: "Evaluation Overview"\n-output: html_document\n+title: \'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)\'\n+output:\n+ html_document:\n+ number_sections: true\n+ toc: true\n+ theme: cosmo\n+ highlight: tango\n ---\n \n ```{r setup, include=FALSE, warning=FALSE, message=FALSE}\n-knitr::opts_chunk$set(echo = ECHO)\n-```\n-\n-```{bash \'copy data from datasets directory to working directory\', echo=FALSE}\n-# Copy uploaded data to the working directory\n-for f in $(echo READS | sed "s/,/ /g")\n-do\n- cp $f ./\n-done\n-```\n-\n-```{bash \'run fastqc\', echo=FALSE}\n-# run fastqc and place outputs into the report directory\n-for r in $(ls *.dat)\n-do\n- fastqc -o REPORT_OUTPUT_DIR $r > /dev/null 2>&1\n-done\n-```\n-\n-```{bash \'parse fastqc results\', echo=FALSE}\n-##==== copy fastqc generated zip files from report output directory to job work directory ==\n-cp -r REPORT_OUTPUT_DIR/*zip ./\n-\n-# create a file to store data file paths\n-echo "sample_id,file_path" > PWF_file_paths.txt # Pass, Warning, Fail\n-echo "sample_id,file_path" > PBQS_file_paths.txt # Per Base Quality Score\n-echo "sample_id,file_path" > PSQS_file_paths.txt # Per Sequence Quality Score\n-echo "sample_id,file_path" > PSGC_file_paths.txt # Per Sequence GC Content\n-echo "sample_id,file_path" > PBSC_file_paths.txt # Per Base Sequence Content\n-echo "sample_id,file_path" > PBNC_file_paths.txt # Per Base N Content\n-echo "sample_id,file_path" > SDL_file_paths.txt # Sequence Duplication Level\n-echo "sample_id,file_path" > SLD_file_paths.txt # Sequence Length Distribution\n-echo "sample_id,file_path" > KMC_file_paths.txt # Kmer Content\n-\n-for i in $(ls *.zip)\n-do\n- BASE=$(echo $i | sed \'s/\\(.*\\)\\.zip/\\1/g\')\n- echo $BASE\n- unzip ${BASE}.zip > /dev/null 2>&1\n- \n- ##====== pass,warning,fail (WSF) =============\n- awk \'/^>>/ {print}\' "$BASE"/fastqc_data.txt | grep -v \'END_MODULE\' | sed \'s/>>//\' > "$BASE"-PWF.txt\n- echo "${BASE},${BASE}-PWF.txt" >> PWF_file_paths.txt\n-\n- ##====== per base quality scores (PBQS) ======\n- awk \'/^>>Per base sequence quality/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-PBQS.txt\n- echo "${BASE},${BASE}-PBQS.txt" >> PBQS_file_paths.txt\n-\n- ##====== per sequence quality scores (PSQS)\n- awk \'/^>>Per sequence quality scores/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-PSQS.txt\n- echo "${BASE},${BASE}-PSQS.txt" >> PSQS_file_paths.txt\n-\n- ##====== Per sequence GC content (PSGC)\n- awk \'/^>>Per sequence GC content/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-PSGC.txt\n- echo "${BASE},${BASE}-PSGC.txt" >> PSGC_file_paths.txt\n- \n- ##====== Per Base Sequence Content (PBSC)\n- awk \'/^>>Per base sequence content/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-PBSC.txt\n- echo "${BASE},${BASE}-PBSC.txt" >> PBSC_file_paths.txt\n- \n- ##====== Per Base N Content (PBNC)\n- awk \'/^>>Per base N content/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-PBNC.txt\n- echo "${BASE},${BASE}-PBNC.txt" >> PBNC_file_paths.txt\n- \n- ##====== Sequence Duplication Level (SDL)\n- awk \'/^>>Sequence Duplication Levels/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-SDL.txt\n- echo "${BASE},${BASE}-SDL.txt" >> SDL_file_paths.txt\n- \n- ##====== Sequence Length Distribution (SLD)\n- awk \'/^>>Sequence Length Distribution/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-SLD.txt\n- echo "${BASE},${BASE}-SLD.txt" >> SLD_file_paths.txt\n- \n- ##====== Kmer Content ============\n- awk \'/^>>Kmer Content/ {flag=1; next} /END_MODULE/ {flag=0} flag\' "$BASE"/fastqc_data.txt >"$BASE"-KMC.txt\n- echo "${BASE},${BASE}-KMC.txt" >> KMC_file_paths.txt\n- \n-done\n+knitr::opts_chunk$set(\n+ echo = ECHO,\n+ error = TRUE\n+)\n ```\n \n \n-## Evaluat'..b' stop("No pre-trimming reads provided!")\n+} else {\n+ ## run fastqc evaluation\n+ fastqc_command = paste0(\'fastqc \') %>% \n+ (function(x) {\n+ ifelse(\'CONTAMINANTS\' != \'None\', paste0(x, \'-c CONTAMINANTS \'), x)\n+ }) %>% \n+ (function(x) {\n+ ifelse(\'LIMITS\' != \'None\', paste0(x, \'-l LIMITS \'), x)\n+ }) %>% \n+ (function(x) {\n+ paste0(x, \'-o REPORT_DIR \')\n+ })\n+ fastqc_command_reads_1 = paste0(fastqc_command, \'READS_1 > /dev/null 2>&1\')\n+ system(fastqc_command_reads_1, intern = TRUE)\n+ \n+ # Original html report\n+ reads_1_base = tail(strsplit(\'READS_1\', \'/\')[[1]], 1)\n+ original_html = tags$a(href=paste0(reads_1_base, \'_fastqc.html\'), paste0(\'HTML report: \', opt$name_1))\n+ \n+ unzip(paste0(\'REPORT_DIR/\', reads_1_base, \'_fastqc.zip\'), exdir = \'REPORT_DIR\')\n+ reads_1_unzip = paste0(\'REPORT_DIR/\', reads_1_base, \'_fastqc/\')\n+ # fastqc_data.txt\n+ file.copy(paste0(reads_1_unzip, \'fastqc_data.txt\'), \'REPORT_DIR/reads_1_fastqc_data.txt\')\n+ fastqc_data = tags$a(href=\'reads_1_fastqc_data.txt\', paste0(\'fastqc_data.txt: \', opt$name_1))\n+ # summary.txt\n+ file.copy(paste0(reads_1_unzip, \'summary.txt\'), \'REPORT_DIR/reads_1_summary.txt\')\n+ summary_data = tags$a(href=\'reads_1_summary.txt\', paste0(\'summary.txt: \', opt$name_1))\n+ \n+ tags$ul(\n+ tags$li(original_html),\n+ tags$li(fastqc_data),\n+ tags$li(summary_data)\n+ )\n }\n ```\n \n \n+## Evaluation of reads after trimming\n+\n ```{r}\n-my_icon = c(\'ok\', \'remove\', \'star\')\n-names(my_icon) = c(\'pass\', \'fail\', \'warn\')\n-evaluate_list = list()\n-for (i in colnames(PWF_df)[-1]) {\n- evaluate_list[[i]] = formatter(\n- "span", \n- style = x ~ style("background-color" = ifelse(x ==\'pass\', \'#9CD027\', ifelse(x == \'fail\', \'#CC0000\', \'#FF4E00\')), \n- "color" = "white",\n- "width" = "50px",\n- "float" = "left",\n- "padding-right" = "5px")\n- )\n+if (\'READS_2\' == \'None\') {\n+ stop("No pre-trimming reads provided!")\n+} else {\n+ ## run fastqc evaluation\n+ fastqc_command = paste0(\'fastqc \') %>% \n+ (function(x) {\n+ ifelse(\'CONTAMINANTS\' != \'None\', paste0(x, \'-c CONTAMINANTS \'), x)\n+ }) %>% \n+ (function(x) {\n+ ifelse(\'LIMITS\' != \'None\', paste0(x, \'-l LIMITS \'), x)\n+ }) %>% \n+ (function(x) {\n+ paste0(x, \'-o REPORT_DIR \')\n+ })\n+ fastqc_command_reads_2 = paste0(fastqc_command, \'READS_2 > /dev/null 2>&1\')\n+ system(fastqc_command_reads_2, intern = TRUE)\n+ \n+ # Original html report\n+ reads_2_base = tail(strsplit(\'READS_2\', \'/\')[[1]], 1)\n+ original_html = tags$a(href=paste0(reads_2_base, \'_fastqc.html\'), paste0(\'HTML report: \', opt$name_2))\n+ \n+ unzip(paste0(\'REPORT_DIR/\', reads_2_base, \'_fastqc.zip\'), exdir = \'REPORT_DIR\')\n+ reads_2_unzip = paste0(\'REPORT_DIR/\', reads_2_base, \'_fastqc/\')\n+ # fastqc_data.txt\n+ file.copy(paste0(reads_2_unzip, \'fastqc_data.txt\'), \'REPORT_DIR/reads_2_fastqc_data.txt\')\n+ fastqc_data = tags$a(href=\'reads_2_fastqc_data.txt\', paste0(\'fastqc_data.txt: \', opt$name_2))\n+ # summary.txt\n+ file.copy(paste0(reads_2_unzip, \'summary.txt\'), \'REPORT_DIR/reads_2_summary.txt\')\n+ summary_data = tags$a(href=\'reads_2_summary.txt\', paste0(\'summary.txt: \', opt$name_2))\n+ \n+ tags$ul(\n+ tags$li(original_html),\n+ tags$li(fastqc_data),\n+ tags$li(summary_data)\n+ )\n }\n+```\n \n-formattable(PWF_df, evaluate_list)\n+\n+\n+# Fastqc output visualization\n+\n+## Overview\n+\n+```{r}\n+reads_1_summary = read.csv(\'REPORT_DIR/reads_1_summary.txt\', header = FALSE, sep = \'\\t\')[, 2:1]\n+reads_2_summary = read.csv(\'REPORT_DIR/reads_2_summary.txt\', header = FALSE, sep = \'\\t\')[, 1]\n+combined_summary = cbind(reads_1_summary, reads_2_summary)\n+names(combined_summary) = c(\'MODULE\', paste0(opt$name_1, \'(before)\'), paste0(opt$name_2, \'(after)\'))\n+combined_summary[combined_summary == \'FAIL\'] = \'FAIL (X)\'\n+combined_summary[combined_summary == \'WARN\'] = \'WARN (!)\'\n+knitr::kable(combined_summary)\n+```\n+\n+# Session Info\n+\n+```{r \'session info\'}\n+sessionInfo()\n ```\n\\ No newline at end of file\n' |
b |
diff -r 600c39b11913 -r 507eec497730 02_fastqc_original_reports.Rmd --- a/02_fastqc_original_reports.Rmd Tue Aug 15 15:50:21 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,20 +0,0 @@ ---- -title: "FastQC original reports" -output: html_document ---- - -```{r 'FastQC original reports', include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set(echo = ECHO) -``` - - -Below are links to ***Fastqc*** original html reports. - -```{r 'html report links'} -html_report_list = list() -html_files = list.files('REPORT_OUTPUT_DIR', pattern = '.*html') -for (i in html_files) { - html_report_list[[i]] = tags$li(tags$a(href=i, i)) -} -tags$ul(html_report_list) -``` \ No newline at end of file |
b |
diff -r 600c39b11913 -r 507eec497730 02_per_base_sequence_quality.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/02_per_base_sequence_quality.Rmd Tue Nov 07 16:52:24 2017 -0500 |
b |
@@ -0,0 +1,47 @@ +--- +title: 'Per base sequence quality' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO, + error = TRUE +) +``` + +### Per base sequence quality + +```{r 'per base sequence quality', fig.width=10} +## reads 1 +pbsq_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base sequence quality') +pbsq_1$id = 1:length(pbsq_1$X.Base) + +melt_pbsq_1 = filter(melt(pbsq_1, id=c('X.Base', 'id')), variable == 'Mean') +melt_pbsq_1$trim = 'before' + + +## reads 2 +pbsq_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base sequence quality') +pbsq_2$id = 1:length(pbsq_2$X.Base) + +melt_pbsq_2 = filter(melt(pbsq_2, id=c('X.Base', 'id')), variable == 'Mean') +melt_pbsq_2$trim = 'after' + +comb_pbsq = rbind(melt_pbsq_1, melt_pbsq_2) +comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim) + +p = ggplot(data = comb_pbsq) + + geom_line(mapping = aes(x = id, y = value, group = variable, color = variable)) + + scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) + + facet_grid(. ~ trim) + + ylim(0, max(comb_pbsq$value) + 5) + + theme(axis.text.x = element_text(angle=45)) +ggplotly(p) + +``` \ No newline at end of file |
b |
diff -r 600c39b11913 -r 507eec497730 03_per_tile_sequence_quality.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/03_per_tile_sequence_quality.Rmd Tue Nov 07 16:52:24 2017 -0500 |
b |
@@ -0,0 +1,48 @@ +--- +title: 'Per Tile Sequence Quality' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO, + error = TRUE +) +``` + +### Per tile sequence quality + +```{r 'per tile sequence quality', fig.width=10} +## check if 'per tile sequence quality' module exits or not +check_ptsq = grep('Per tile sequence quality', readLines('REPORT_DIR/reads_1_fastqc_data.txt')) +if (length(check_ptsq) > 0) { + ## reads 1 + ptsq_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per tile sequence quality') + ptsq_1$trim = 'before' + + ## reads 2 + ptsq_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per tile sequence quality') + ptsq_2$trim = 'after' + + comb_ptsq = rbind(ptsq_1, ptsq_2) + comb_ptsq$trim = factor(levels = c('before', 'after'), comb_ptsq$trim) + comb_ptsq$Base = factor(levels = unique(comb_ptsq$Base), comb_ptsq$Base) + + # convert integers to charaters + comb_ptsq$Tile = as.character(comb_ptsq$X.Tile) + + p = ggplot(data = comb_ptsq, aes(x = Base, y = Tile, fill = Mean)) + + geom_raster() + + facet_grid(. ~ trim) + + xlab('Position in read (bp)') + + ylab('') + + theme(axis.text.x = element_text(angle=45)) + ggplotly(p) +} else { + print('No "per tile sequence quality" data') +} \ No newline at end of file |
b |
diff -r 600c39b11913 -r 507eec497730 04_per_sequence_quality_score.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/04_per_sequence_quality_score.Rmd Tue Nov 07 16:52:24 2017 -0500 |
b |
@@ -0,0 +1,39 @@ +--- +title: 'Per sequence quality score' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO, + error = TRUE +) +``` + +### Per sequence quality score + +```{r 'Per sequence quality score', fig.width=10} +## reads 1 +psqs_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per sequence quality scores') +psqs_1$trim = 'before' + +## reads 2 +psqs_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per sequence quality scores') +psqs_2$trim = 'after' + +comb_psqs = rbind(psqs_1, psqs_2) +comb_psqs$trim = factor(levels = c('before', 'after'), comb_psqs$trim) + +p = ggplot(data = comb_psqs, aes(x = X.Quality, y = Count)) + + geom_line(color = 'red') + + facet_grid(. ~ trim) + + xlim(min(comb_psqs$X.Quality), max(comb_psqs$X.Quality)) + + xlab('Mean Sequence Qaulity (Phred Score)') + + ylab('') +ggplotly(p) +``` \ No newline at end of file |
b |
diff -r 600c39b11913 -r 507eec497730 05_per_base_sequence_content.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/05_per_base_sequence_content.Rmd Tue Nov 07 16:52:24 2017 -0500 |
b |
@@ -0,0 +1,47 @@ +--- +title: 'Per base sequence content' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO, + error = TRUE +) +``` + +### Per base sequence content + +```{r 'Per base sequence content', fig.width=10} +## reads 1 +pbsc_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base sequence content') +pbsc_1$id = 1:length(pbsc_1$X.Base) + +melt_pbsc_1 = melt(pbsc_1, id=c('X.Base', 'id')) +melt_pbsc_1$trim = 'before' + + +## reads 2 +pbsc_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base sequence content') +pbsc_2$id = 1:length(pbsc_2$X.Base) + +melt_pbsc_2 = melt(pbsc_2, id=c('X.Base', 'id')) +melt_pbsc_2$trim = 'after' + +comb_pbsc = rbind(melt_pbsc_1, melt_pbsc_2) +comb_pbsc$trim = factor(levels = c('before', 'after'), comb_pbsc$trim) + +p = ggplot(data = comb_pbsc, aes(x = id, y = value, color = variable)) + + geom_line() + + facet_grid(. ~ trim) + + xlim(min(comb_pbsc$id), max(comb_pbsc$id)) + + ylim(0, 100) + + xlab('Position in read (bp)') + + ylab('') +ggplotly(p) +``` \ No newline at end of file |
b |
diff -r 600c39b11913 -r 507eec497730 06_per_sequence_gc_content.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/06_per_sequence_gc_content.Rmd Tue Nov 07 16:52:24 2017 -0500 |
b |
@@ -0,0 +1,38 @@ +--- +title: 'Per sequence GC content' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO, + error = TRUE +) +``` + +### Per sequence GC content + +```{r 'Per sequence GC content', fig.width=10} +## reads 1 +psGCc_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per sequence GC content') +psGCc_1$trim = 'before' + +## reads 2 +psGCc_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per sequence GC content') +psGCc_2$trim = 'after' + +comb_psGCc = rbind(psGCc_1, psGCc_2) +comb_psGCc$trim = factor(levels = c('before', 'after'), comb_psGCc$trim) + +p = ggplot(data = comb_psGCc, aes(x = X.GC.Content, y = Count)) + + geom_line(color = 'red') + + facet_grid(. ~ trim) + + xlab('Mean Sequence Qaulity (Phred Score)') + + ylab('') +ggplotly(p) +``` \ No newline at end of file |
b |
diff -r 600c39b11913 -r 507eec497730 07_per_base_n_content.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/07_per_base_n_content.Rmd Tue Nov 07 16:52:24 2017 -0500 |
b |
@@ -0,0 +1,43 @@ +--- +title: 'Per base N content' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO, + error = TRUE +) +``` + +### Per base N content + +```{r 'Per base N content', fig.width=10} +## reads 1 +pbNc_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base N content') +pbNc_1$id = 1:length(pbNc_1$X.Base) +pbNc_1$trim = 'before' + +## reads 2 +pbNc_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base N content') +pbNc_2$id = 1:length(pbNc_2$X.Base) +pbNc_2$trim = 'after' + +comb_pbNc = rbind(pbNc_1, pbNc_2) +comb_pbNc$trim = factor(levels = c('before', 'after'), comb_pbNc$trim) + +p = ggplot(data = comb_pbNc, aes(x = id, y = N.Count)) + + geom_line(color = 'red') + + scale_x_continuous(breaks = pbNc_2$id, labels = pbNc_2$X.Base) + + facet_grid(. ~ trim) + + ylim(0, 1) + + xlab('N-Count') + + ylab('') + + theme(axis.text.x = element_text(angle=45)) +ggplotly(p) +``` \ No newline at end of file |
b |
diff -r 600c39b11913 -r 507eec497730 08_sequence_length_distribution.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/08_sequence_length_distribution.Rmd Tue Nov 07 16:52:24 2017 -0500 |
b |
@@ -0,0 +1,42 @@ +--- +title: 'Sequence Length Distribution' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO, + error = TRUE +) +``` + +### Sequence Length Distribution + +```{r 'Sequence Length Distribution', fig.width=10} +## reads 1 +sld_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Sequence Length Distribution') +sld_1$id = 1:length(sld_1$X.Length) +sld_1$trim = 'before' + +## reads 2 +sld_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Sequence Length Distribution') +sld_2$id = 1:length(sld_2$X.Length) +sld_2$trim = 'after' + +comb_sld = rbind(sld_1, sld_2) +comb_sld$trim = factor(levels = c('before', 'after'), comb_sld$trim) + +p = ggplot(data = comb_sld, aes(x = id, y = Count)) + + geom_line(color = 'red') + + scale_x_continuous(breaks = sld_2$id, labels = sld_2$X.Length) + + facet_grid(. ~ trim) + + xlab('Sequence Length (bp)') + + ylab('') + + theme(axis.text.x = element_text(angle=45)) +ggplotly(p) +``` \ No newline at end of file |
b |
diff -r 600c39b11913 -r 507eec497730 09_sequence_duplication_levels.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/09_sequence_duplication_levels.Rmd Tue Nov 07 16:52:24 2017 -0500 |
b |
@@ -0,0 +1,49 @@ +--- +title: 'Sequence Duplication Levels' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO, + error = TRUE +) +``` + +### Sequence Duplication Levels + +```{r 'Sequence Duplication Levels', fig.width=10} +## reads 1 +sdl_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Sequence Duplication Levels', header = FALSE, comment.char = '#') +names(sdl_1) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total') +sdl_1$id = 1:length(sdl_1$Duplication_Level) + +melt_sdl_1 = melt(sdl_1, id=c('Duplication_Level', 'id')) +melt_sdl_1$trim = 'before' + + +## reads 2 +sdl_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Sequence Duplication Levels', header = FALSE, comment.char = '#') +names(sdl_2) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total') +sdl_2$id = 1:length(sdl_2$Duplication_Level) + +melt_sdl_2 = melt(sdl_2, id=c('Duplication_Level', 'id')) +melt_sdl_2$trim = 'after' + +comb_sdl = rbind(melt_sdl_1, melt_sdl_2) +comb_sdl$trim = factor(levels = c('before', 'after'), comb_sdl$trim) + +p = ggplot(data = comb_sdl, aes(x = id, y = value, color = variable)) + + geom_line() + + scale_x_continuous(breaks = sdl_2$id, labels = sdl_2$Duplication_Level) + + facet_grid(. ~ trim) + + xlab('Sequence Duplication Level') + + ylab('') + + theme(axis.text.x = element_text(angle=45)) +ggplotly(p) +``` \ No newline at end of file |
b |
diff -r 600c39b11913 -r 507eec497730 10_adapter_content.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/10_adapter_content.Rmd Tue Nov 07 16:52:24 2017 -0500 |
b |
@@ -0,0 +1,46 @@ +--- +title: 'Adapter Content' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO, + error = TRUE +) +``` + +### Adapter Content + +```{r 'Adapter Content', fig.width=10} +## reads 1 +ac_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Adapter Content') +ac_1$id = 1:length(ac_1$X.Position) + +melt_ac_1 = melt(ac_1, id=c('X.Position', 'id')) +melt_ac_1$trim = 'before' + +## reads 2 +ac_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Adapter Content') +ac_2$id = 1:length(ac_2$X.Position) + +melt_ac_2 = melt(ac_2, id=c('X.Position', 'id')) +melt_ac_2$trim = 'after' + +comb_ac = rbind(melt_ac_1, melt_ac_2) +comb_ac$trim = factor(levels = c('before', 'after'), comb_ac$trim) + +p = ggplot(data = comb_ac, aes(x = id, y = value, color = variable)) + + geom_line() + + facet_grid(. ~ trim) + + xlim(min(comb_ac$id), max(comb_ac$id)) + + ylim(0, 1) + + xlab('Position in read (bp)') + + ylab('') +ggplotly(p) +``` \ No newline at end of file |
b |
diff -r 600c39b11913 -r 507eec497730 11_kmer_content.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/11_kmer_content.Rmd Tue Nov 07 16:52:24 2017 -0500 |
b |
@@ -0,0 +1,31 @@ +--- +title: 'Kmer Content' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO, + error = TRUE +) +``` + +### Kmer Content {.tabset} + +#### Before + +```{r 'Kmer Content (before)', fig.width=10} +kc_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Kmer Content') +knitr::kable(kc_1) +``` + +#### After +```{r 'Kmer Content (after)', fig.width=10} +kc_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Kmer Content') +knitr::kable(kc_2) +``` \ No newline at end of file |
b |
diff -r 600c39b11913 -r 507eec497730 1_per_base_quality_scores.Rmd --- a/1_per_base_quality_scores.Rmd Tue Aug 15 15:50:21 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,62 +0,0 @@ ---- -title: "Per Base Quality Scores" -output: html_document ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set(echo = ECHO) -``` - - -## Per Base Quality Scores - -```{r} -PBQS_df = data.frame() -PBQS_file_paths = read.csv('PBQS_file_paths.txt', - header = TRUE, stringsAsFactors = FALSE) -for(i in 1:nrow(PBQS_file_paths)) { - # file_path = paste0('REPORT_OUTPUT_DIR/', PBQS_file_paths[i,2]) - file_path = PBQS_file_paths[i,2] - pbqs_df = read.csv(file_path, - sep='\t', header=TRUE, stringsAsFactors = FALSE) %>% - mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]), - Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>% - (function (df) { - df1 = select(df, -Base2) - df2 = select(df, -Base1) %>% filter(Base2 != '') - colnames(df1) = c(colnames(df1)[1:7], 'Base') - colnames(df2) = c(colnames(df2)[1:7], 'Base') - res = rbind(df1, df2) %>% arrange(Base) - return(res) - }) - pbqs_df$sample_id = rep(PBQS_file_paths[i,1], nrow(pbqs_df)) - PBQS_df = rbind(PBQS_df, pbqs_df) -} -``` - - -```{r} -# datatable(PBQS_df) -max_phred = max(PBQS_df$Mean) + 10 -hchart(PBQS_df, "line", hcaes(x = Base, y = Mean, group = sample_id)) %>% - hc_title( - text = "Per Base Quality Score" - ) %>% - hc_yAxis( - title = list(text = "Mean Base Quality Score"), - min = 0, - max = max_phred, - plotLines = list( - list(label = list(text = "Phred Score = 27"), - width = 2, - dashStyle = "dash", - color = "green", - value = 27), - list(label = list(text = "Phred Score = 20"), - width = 2, - color = "red", - value = 20) - ) - ) %>% - hc_exporting(enabled = TRUE) -``` |
b |
diff -r 600c39b11913 -r 507eec497730 2_per_base_N_content.Rmd --- a/2_per_base_N_content.Rmd Tue Aug 15 15:50:21 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,58 +0,0 @@ ---- -title: "Per Base N Content" -output: html_document ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set(echo = ECHO) -``` - -## Per Base N Content - -```{r} -PBNC_df = data.frame() -PBNC_file_paths = read.csv('PBNC_file_paths.txt', - header = TRUE, stringsAsFactors = FALSE) -for(i in 1:nrow(PBNC_file_paths)) { - # file_path = paste0('REPORT_OUTPUT_DIR/', PBNC_file_paths[i,2]) - file_path = PBNC_file_paths[i,2] - pbnc_df = read.csv(file_path, - sep='\t', header=TRUE, stringsAsFactors = FALSE) %>% - mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]), - Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>% - (function (df) { - df1 = select(df, -Base2) - df2 = select(df, -Base1) %>% filter(Base2 != '') - colnames(df1) = c(colnames(df1)[1:2], 'Base') - colnames(df2) = c(colnames(df2)[1:2], 'Base') - res = rbind(df1, df2) %>% arrange(Base) - return(res) - }) - pbnc_df$sample_id = rep(PBNC_file_paths[i,1], nrow(pbnc_df)) - PBNC_df = rbind(PBNC_df, pbnc_df) -} -``` - - -```{r} -PBNC_df$N.Count = PBNC_df$N.Count * 100 -max_phred = max(PBNC_df$N.Count) + 5 -hchart(PBNC_df, "line", hcaes(x = as.character(Base), y = N.Count, group = sample_id)) %>% - hc_title( - text = "Per Base N Content" - ) %>% - hc_xAxis( - title = list(text = "Base Position") - ) %>% - hc_yAxis( - title = list(text = "N %"), - plotLines = list( - list(label = list(text = "N = 5%"), - width = 2, - dashStyle = "dash", - color = "red", - value = 5) - ) - ) %>% - hc_exporting(enabled = TRUE) -``` |
b |
diff -r 600c39b11913 -r 507eec497730 3_per_sequence_quality_scores.Rmd --- a/3_per_sequence_quality_scores.Rmd Tue Aug 15 15:50:21 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,50 +0,0 @@ ---- -title: "Per Sequence Quality Scores" -output: html_document ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set(echo = ECHO) -``` - -## Per Sequence Quality Scores - -```{r} -PSQS_df = data.frame() -PSQS_file_paths = read.csv('PSQS_file_paths.txt', - header = TRUE, stringsAsFactors = FALSE) -for(i in 1:nrow(PSQS_file_paths)) { - # file_path = paste0('REPORT_OUTPUT_DIR/', PSQS_file_paths[i,2]) - file_path = PSQS_file_paths[i,2] - psqs_df = read.csv(file_path, - sep='\t', header=TRUE, stringsAsFactors = FALSE) - psqs_df$sample_id = rep(PSQS_file_paths[i,1], nrow(psqs_df)) - PSQS_df = rbind(PSQS_df, psqs_df) -} -``` - - -```{r} -max_phred = max(PSQS_df$X.Quality) + 5 -hchart(PSQS_df, "line", hcaes(x = X.Quality, y = Count, group = sample_id)) %>% - hc_title( - text = "Per Sequence Quality Score" - ) %>% - hc_xAxis( - title = list(text = "Mean Sequence Quality Score"), - min = 0, - max = max_phred, - plotLines = list( - list(label = list(text = "Phred Score = 27"), - width = 2, - dashStyle = "dash", - color = "green", - value = 27), - list(label = list(text = "Phred Score = 20"), - width = 2, - color = "red", - value = 20) - ) - ) %>% - hc_exporting(enabled = TRUE) -``` |
b |
diff -r 600c39b11913 -r 507eec497730 4_per_sequence_GC_content.Rmd --- a/4_per_sequence_GC_content.Rmd Tue Aug 15 15:50:21 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,38 +0,0 @@ ---- -title: "Per Sequence GC Content" -output: html_document ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set(echo = ECHO) -``` - -## Per Sequence GC Content - - -```{r} -PSGC_df = data.frame() -PSGC_file_paths = read.csv('PSGC_file_paths.txt', - header = TRUE, stringsAsFactors = FALSE) -for(i in 1:nrow(PSGC_file_paths)) { - # file_path = paste0('REPORT_OUTPUT_DIR/', PSGC_file_paths[i,2]) - file_path = PSGC_file_paths[i,2] - psgc_df = read.csv(file_path, - sep='\t', header=TRUE, stringsAsFactors = FALSE) - psgc_df$sample_id = rep(PSGC_file_paths[i,1], nrow(psgc_df)) - PSGC_df = rbind(PSGC_df, psgc_df) -} -``` - - -```{r} -max_phred = max(PSGC_df$Count) + 5 -hchart(PSGC_df, "line", hcaes(x = X.GC.Content, y = Count, group = sample_id)) %>% - hc_title( - text = "Per Sequence GC Content" - ) %>% - hc_xAxis( - title = list(text = "% GC") - ) %>% - hc_exporting(enabled = TRUE) -``` |
b |
diff -r 600c39b11913 -r 507eec497730 5_per_base_sequence_content.Rmd --- a/5_per_base_sequence_content.Rmd Tue Aug 15 15:50:21 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,45 +0,0 @@ ---- -title: "Per Base Sequence Content" -output: html_document ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set(echo = ECHO) -``` - -## Per Base Sequence Content - -```{r} -PBSC_df = data.frame() -PBSC_file_paths = read.csv('PBSC_file_paths.txt', - header = TRUE, stringsAsFactors = FALSE) -for(i in 1:nrow(PBSC_file_paths)) { - # file_path = paste0('REPORT_OUTPUT_DIR/', PBSC_file_paths[i,2]) - file_path = PBSC_file_paths[i,2] - pbsc_df = read.csv(file_path, - sep='\t', header=TRUE, stringsAsFactors = FALSE) %>% - mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]), - Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>% - (function (df) { - df1 = select(df, -Base2) - df2 = select(df, -Base1) %>% filter(Base2 != '') - colnames(df1) = c(colnames(df1)[1:5], 'Base') - colnames(df2) = c(colnames(df2)[1:5], 'Base') - res = rbind(df1, df2) %>% arrange(Base) - return(res) - }) - pbsc_df$sample_id = rep(PBSC_file_paths[i,1], nrow(pbsc_df)) - PBSC_df = rbind(PBSC_df, pbsc_df) -} -``` - - -```{r out.width="100%"} -PBSC_df_2 = select(PBSC_df, -X.Base) %>% - melt(id = c('Base', 'sample_id'), value.name = 'base_percentage') -p = ggplot(data = PBSC_df_2, aes(x = Base, y = base_percentage, group = variable, color = variable)) + - geom_line() + - facet_wrap(~ sample_id) -ggplotly(p) -``` - |
b |
diff -r 600c39b11913 -r 507eec497730 _site.yml --- a/_site.yml Tue Aug 15 15:50:21 2017 -0400 +++ b/_site.yml Tue Nov 07 16:52:24 2017 -0500 |
b |
@@ -8,21 +8,29 @@ icon: fa-home href: index.html - text: "Evaluation Overview" - href: 01_evaluation_overview.html - - text: "Evaluation Items" + href: x01_evaluation_overview.html + - text: "Evaluation by data module" menu: - - text: "Per Base Quality Scores" - href: 1_per_base_quality_scores.html + - text: "Per Base Sequence Quality" + href: x02_per_base_sequence_quality.html + - text: "Per Tile Sequence Quality" + href: x03_per_tile_sequence_quality.html + - text: "Per Sequence Quality Score" + href: x04_per_sequence_quality_score.html + - text: "Per Base Sequence Content" + href: x05_per_base_sequence_content.html + - text: "Per Sequence GC Content" + href: x06_per_sequence_gc_content.html - text: "Per Base N Content" - href: 2_per_base_N_content.html - - text: "Per Sequence Quality Scores" - href: 3_per_sequence_quality_scores.html - - text: "Per Sequence GC Content" - href: 4_per_sequence_GC_content.html - - text: "Per Base Sequence Content" - href: 5_per_base_sequence_content.html - - text: "Original FastQC Reports" - href: 02_fastqc_original_reports.html + href: x07_per_base_n_content.html + - text: "Sequence Length Distribution" + href: x08_sequence_length_distribution.html + - text: "Sequence Duplication Levels" + href: x09_sequence_duplication_levels.html + - text: "Adapter Content" + href: x10_adapter_content.html + - text: "Kmer Content" + href: x11_kmer_content.html output: html_document: theme: cosmo |
b |
diff -r 600c39b11913 -r 507eec497730 fastqc_site.xml --- a/fastqc_site.xml Tue Aug 15 15:50:21 2017 -0400 +++ b/fastqc_site.xml Tue Nov 07 16:52:24 2017 -0500 |
[ |
b'@@ -1,9 +1,9 @@\n-<tool id="fastqc_site" name="Fastqc Site" version="1.0.0">\n+<tool id="fastqc_site" name="Fastqc Site" version="2.0.0">\n <requirements>\n <requirement type="package" version="1.15.0.6-0">pandoc</requirement>\n <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement>\n <requirement type="package" version="1.20.0">r-getopt</requirement>\n- <requirement type="package" version="1.2">r-rmarkdown</requirement>\n+ <requirement type="package" version="1.3">r-rmarkdown</requirement>\n <requirement type="package" version="1.8.4">r-plyr</requirement>\n <requirement type="package" version="1.1.0">r-stringr</requirement>\n <requirement type="package" version="0.5.0">r-highcharter</requirement>\n@@ -14,22 +14,12 @@\n <requirement type="package" version="0.3.5">r-htmltools</requirement>\n <requirement type="package" version="0.11.5">fastqc</requirement>\n </requirements>\n- <description>\n- Implements FastQC analysis and display results in R Markdown website.\n- </description>\n <stdio>\n- <regex match="Execution halted"\n- source="both"\n- level="fatal"\n- description="Execution halted." />\n- <regex match="Error in"\n- source="both"\n- level="fatal"\n- description="An undefined error occured, please check your intput carefully and contact your administrator." />\n- <regex match="Fatal error"\n- source="both"\n- level="fatal"\n- description="An undefined error occured, please check your intput carefully and contact your administrator." />\n+ <!--redirecting stderr to a file. "XXX" is used to match with nothing so that tool running won\'t be interrupted during testing-->\n+ <regex match="XXX"\n+ source="stderr"\n+ level="warning"\n+ description="Check the warnings_and_errors.txt file for more details."/>\n </stdio>\n <command>\n <![CDATA[\n@@ -37,36 +27,65 @@\n Rscript \'${__tool_directory__}/fastqc_site_render.R\'\n \n ## 1. input data\n- -r $reads\n -e $echo\n+ -r $reads_1\n+ -n \'$reads_1.name\'\n+ -R $reads_2\n+ -N \'$reads_2.name\'\n+ -c $contaminants\n+ -l $limits\n \n ## 2. output report and report site directory\n-\t\t -o $fastqc_site\n-\t\t -d $fastqc_site.files_path\n+\t\t -o $report\n+\t\t -d $report.files_path\n+\t\t -s $sink_message\n \n \t\t ## 3. Rmd templates sitting in the tool directory\n \n-\t\t ## _site.yml and index.Rmd template files\n- -s \'${__tool_directory__}/_site.yml\'\n- -i \'${__tool_directory__}/index.Rmd\'\n+\t\t ## _site.yml and index.Rmd template files\n+ -S \'${__tool_directory__}/_site.yml\'\n+ -I \'${__tool_directory__}/index.Rmd\'\n \n- ## other Rmd body template files\n-\t\t -p \'${__tool_directory__}/01_evaluation_overview.Rmd\'\n-\t\t -a \'${__tool_directory__}/02_fastqc_original_reports.Rmd\'\n-\t\t -b \'${__tool_directory__}/1_per_base_quality_scores.Rmd\'\n-\t\t -c \'${__tool_directory__}/2_per_base_N_content.Rmd\'\n-\t\t -f \'${__tool_directory__}/3_per_sequence_quality_scores.Rmd\'\n-\t\t -g \'${__tool_directory__}/4_per_sequence_GC_content.Rmd\'\n-\t\t -h \'${__tool_directory__}/5_per_base_sequence_content.Rmd\'\n+ ## other Rmd body template files\n+\t\t -A \'${__tool_directory__}/01_evaluation_overview.Rmd\'\n+\t\t -B \'${__tool_directory__}/02_per_base_sequence_quality.Rmd\'\n+\t\t -C \'${__tool_directory__}/03_per_tile_sequence_quality.Rmd\'\n+\t\t -D \'${__tool_directory__}/04_per_sequence_quality_score.Rmd\'\n+\t\t -E \'${__tool_directory__}/05_per_base_sequence_content.Rmd\'\n+\t\t -F \'${__tool_directory__}/06_per_sequence_gc_content.Rmd\'\n+\t\t -G \'${__tool_directory__}/07_per_base_n_content.Rmd\'\n'..b't="fastq,fastq.gz,fastq.bz2,bam,sam" name="reads_2" type="data"\n+ label="Short reads after trimming"\n+ help="Short reads data from history. This could be reads after trimming."/>\n+ <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list"\n+ help="Specifies a non-default file which contains the list of adapter sequences which will be explicitly\n+ searched against the library. The file must contain sets of named adapters\n+ in the form name[tab]sequence. Lines prefixed with a hash will be ignored."/>\n+ <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file"\n+ help="Specifies a non-default file which contains a set of criteria\n+ which will be used to determine the warn/error limits for the\n+ various modules. This file can also be used to selectively\n+ remove some modules from the output all together. The format\n+ needs to mirror the default limits.txt file found in the\n+ Configuration folder."/>\n+ <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"\n+ label="Display analysis code in report?"/>\n </inputs>\n <outputs>\n- <data format="html" name="fastqc_site" label="fastqc site" />\n+ <data format="html" name="report" label="fastqc site"/>\n+ <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/>\n </outputs>\n <citations>\n <citation type="bibtex">\n@@ -79,7 +98,8 @@\n <citation type="bibtex">\n @article{allaire2016rmarkdown,\n title={rmarkdown: Dynamic Documents for R, 2016},\n- author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},\n+ author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff\n+ and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},\n journal={R package version 0.9},\n volume={6},\n year={2016}\n@@ -97,31 +117,14 @@\n <citation type="bibtex">\n @misc{plotly2017,\n title = {plotly: Create Interactive Web Graphics via \'plotly.js\'},\n- author = {Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and Marianne Corvellec and Pedro Despouy},\n+ author = {Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and\n+ Marianne Corvellec and Pedro Despouy},\n year = {2017},\n note = {R package version 4.6.0},\n url = {https://CRAN.R-project.org/package=plotly},\n }\n </citation>\n <citation type="bibtex">\n- @misc{highcharter2017,\n- title = {highcharter: A Wrapper for the \'Highcharts\' Library},\n- author = {Joshua Kunst},\n- year = {2017},\n- note = {R package version 0.5.0},\n- url = {https://CRAN.R-project.org/package=highcharter},\n- }\n- </citation>\n- <citation type="bibtex">\n- @misc{formattable2016,\n- title = {formattable: Create \'Formattable\' Data Structures},\n- author = {Kun Ren and Kenton Russell},\n- year = {2016},\n- note = {R package version 0.2.0.1},\n- url = {https://CRAN.R-project.org/package=formattable},\n- }\n- </citation>\n- <citation>\n @article{ewels2016multiqc,\n title={MultiQC: summarize analysis results for multiple tools and samples in a single report},\n author={Ewels, Philip and Magnusson, M{\\aa}ns and Lundin, Sverker and K{\\"a}ller, Max},\n' |
b |
diff -r 600c39b11913 -r 507eec497730 fastqc_site_render.R --- a/fastqc_site_render.R Tue Aug 15 15:50:21 2017 -0400 +++ b/fastqc_site_render.R Tue Nov 07 16:52:24 2017 -0500 |
[ |
b'@@ -1,195 +1,283 @@\n-##======= Handle arguments from command line ========\n-# setup R error handline to go to stderr\n-options(show.error.messages=FALSE,\n- error=function(){\n- cat(geterrmessage(), file=stderr())\n- quit("no", 1, F)\n- })\n-\n-# we need that to not crash galaxy with an UTF8 error on German LC settings.\n-loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")\n-\n-# suppress warning\n-options(warn = -1)\n-\n-options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)\n-args = commandArgs(trailingOnly=TRUE)\n-\n-suppressPackageStartupMessages({\n- library(getopt)\n- library(tools)\n-})\n-\n-# column 1: the long flag name\n-# column 2: the short flag alias. A SINGLE character string\n-# column 3: argument mask\n-# 0: no argument\n-# 1: argument required\n-# 2: argument is optional\n-# column 4: date type to which the flag\'s argument shall be cast.\n-# possible values: logical, integer, double, complex, character.\n-spec_list=list()\n-\n-##------- 1. input data ---------------------\n-spec_list$READS = c(\'reads\', \'r\', \'1\', \'character\')\n-spec_list$ECHO = c(\'echo\', \'e\', \'1\', \'character\')\n-\n-##--------2. output report and report site directory --------------\n-spec_list$FASTQC_SITE = c(\'fastqc_site\', \'o\', \'1\', \'character\')\n-spec_list$FASTQC_SITE_DIR = c(\'fastqc_site_dir\', \'d\', \'1\', \'character\')\n-\n-##--------3. Rmd templates sitting in the tool directory ----------\n-\n- ## _site.yml and index.Rmd files\n- spec_list$SITE_YML = c(\'site_yml\', \'s\', 1, \'character\')\n- spec_list$INDEX_Rmd = c(\'index_rmd\', \'i\', 1, \'character\')\n- \n- ## other Rmd body template files\n- spec_list$x01 = c(\'x01_evaluation_overview\', \'p\', \'1\', \'character\')\n- spec_list$x02 = c(\'x02_fastqc_original_reports\', \'a\', \'1\', \'character\')\n- spec_list$x1 = c(\'x1_per_base_quality_scores\', \'b\', \'1\', \'character\')\n- spec_list$x2 = c(\'x2_per_base_N_content\', \'c\', \'1\', \'character\')\n- spec_list$x3 = c(\'x3_per_sequence_quality_scores\', \'f\', \'1\', \'character\')\n- spec_list$x4 = c(\'x4_per_sequence_GC_content\', \'g\', \'1\', \'character\')\n- spec_list$x5 = c(\'x5_per_base_sequence_content\', \'h\', \'1\', \'character\')\n-\n-##------------------------------------------------------------------\n-\n-spec = t(as.data.frame(spec_list))\n-opt = getopt(spec)\n-# arguments are accessed by long flag name (the first column in the spec matrix)\n-# NOT by element name in the spec_list\n-# example: opt$help, opt$expression_file\n-##====== End of arguments handling ==========\n-\n-#------ Load libraries ---------\n+library(getopt)\n library(rmarkdown)\n+library(htmltools)\n library(plyr)\n+library(dplyr)\n library(stringr)\n-library(dplyr)\n library(highcharter)\n library(DT)\n library(reshape2)\n library(plotly)\n library(formattable)\n-library(htmltools)\n-\n+options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)\n \n-#----- 1. create the report directory ------------------------\n-paste0(\'mkdir -p \', opt$fastqc_site_dir) %>%\n- system()\n-\n-#----- 2. generate Rmd files with Rmd templates --------------\n-# a. templates without placeholder variables:\n-# copy templates from tool directory to the working directory.\n-# b. templates with placeholder variables:\n-# substitute variables with user input values and place them in the working directory.\n+##============ Sink warnings and errors to a file ==============\n+## use the sink() function to wrap all code within it.\n+##==============================================================\n+zz = file(\'warnings_and_errors.txt\')\n+sink(zz)\n+sink(zz, type = \'message\')\n+ ##---------below is the code for rendering .Rmd templates-----\n+ \n+ ##=============STEP 1: handle command line arguments==========\n+ ##\n+ ##============================================================\n+ # column 1: the long flag name\n+ # column 2: the short flag alias. A SINGLE character string\n+ # column 3: argument mask\n+ # 0: no argument\n+ # 1: argument required\n+ # 2: argument i'..b' {\n+ gsub(\'ECHO\', opt$echo, x)\n+ }) %>%\n+ (function(x) {\n+ gsub(\'REPORT_DIR\', opt$report_dir, x)\n+ }) %>%\n+ (function(x) {\n+ fileConn = file(\'x06_per_sequence_gc_content.Rmd\')\n+ writeLines(x, con=fileConn)\n+ close(fileConn)\n+ })\n+ \n+ # 07_per_base_n_content.Rmd\n+ readLines(opt$x07_per_base_n_content) %>%\n+ (function(x) {\n+ gsub(\'ECHO\', opt$echo, x)\n+ }) %>%\n+ (function(x) {\n+ gsub(\'REPORT_DIR\', opt$report_dir, x)\n+ }) %>%\n+ (function(x) {\n+ fileConn = file(\'x07_per_base_n_content.Rmd\')\n+ writeLines(x, con=fileConn)\n+ close(fileConn)\n+ })\n \n- #----- 02_fastqc_original_reports.Rmd -------------------\n- readLines(opt$x02_fastqc_original_reports) %>%\n- (function(x) {\n- gsub(\'ECHO\', opt$echo, x)\n- }) %>%\n- (function(x) {\n- gsub(\'REPORT_OUTPUT_DIR\', opt$fastqc_site_dir, x)\n- }) %>%\n- (function(x) {\n- fileConn = file(\'02_fastqc_original_reports.Rmd\')\n- writeLines(x, con=fileConn)\n- close(fileConn)\n- })\n+ # 08_sequence_length_distribution.Rmd\n+ readLines(opt$x08_sequence_length_distribution) %>%\n+ (function(x) {\n+ gsub(\'ECHO\', opt$echo, x)\n+ }) %>%\n+ (function(x) {\n+ gsub(\'REPORT_DIR\', opt$report_dir, x)\n+ }) %>%\n+ (function(x) {\n+ fileConn = file(\'x08_sequence_length_distribution.Rmd\')\n+ writeLines(x, con=fileConn)\n+ close(fileConn)\n+ })\n+ \n+ # 09_sequence_duplication_levels.Rmd\n+ readLines(opt$x09_sequence_duplication_levels) %>%\n+ (function(x) {\n+ gsub(\'ECHO\', opt$echo, x)\n+ }) %>%\n+ (function(x) {\n+ gsub(\'REPORT_DIR\', opt$report_dir, x)\n+ }) %>%\n+ (function(x) {\n+ fileConn = file(\'x09_sequence_duplication_levels.Rmd\')\n+ writeLines(x, con=fileConn)\n+ close(fileConn)\n+ })\n+ \n+ # 10_adapter_content.Rmd\n+ readLines(opt$x10_adapter_content) %>%\n+ (function(x) {\n+ gsub(\'ECHO\', opt$echo, x)\n+ }) %>%\n+ (function(x) {\n+ gsub(\'REPORT_DIR\', opt$report_dir, x)\n+ }) %>%\n+ (function(x) {\n+ fileConn = file(\'x10_adapter_content.Rmd\')\n+ writeLines(x, con=fileConn)\n+ close(fileConn)\n+ })\n+ \n+ # 11_kmer_content.Rmd\n+ readLines(opt$x11_kmer_content) %>%\n+ (function(x) {\n+ gsub(\'ECHO\', opt$echo, x)\n+ }) %>%\n+ (function(x) {\n+ gsub(\'REPORT_DIR\', opt$report_dir, x)\n+ }) %>%\n+ (function(x) {\n+ fileConn = file(\'x11_kmer_content.Rmd\')\n+ writeLines(x, con=fileConn)\n+ close(fileConn)\n+ })\n+ \n+ ##=============STEP 5: render all .Rmd templates=================\n+ ##\n+ ##===========================================================\n+ extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") {\n+ f = readLines(fastqc_data)\n+ start_line = grep(module_name, f)\n+ end_module_lines = grep(\'END_MODULE\', f)\n+ end_line = end_module_lines[which(end_module_lines > start_line)[1]]\n+ module_data = f[(start_line+1):(end_line-1)]\n+ writeLines(module_data, \'temp.txt\')\n+ read.csv(\'temp.txt\', sep = \'\\t\', header = header, comment.char = comment.char)\n+ }\n+ render_site()\n+ \n+ ##=============STEP 6: manipulate outputs====================\n+ ##\n+ ##===========================================================\n+ file.copy(\'my_site/index.html\', opt$report_html, recursive = TRUE)\n+ system(paste0(\'cp -r my_site/* \', opt$report_dir))\n \n \n-\n-#------ 3. render all Rmd files with render_site() --------\n-render_site() \n-\n-\n-#-------4. manipulate outputs -----------------------------\n-# a. copy index.html to the report output path\n-# b. copy all files in \'my_site\' to the report output directory\n-file.copy(\'my_site/index.html\', opt$fastqc_site, recursive=TRUE)\n-paste0(\'cp -r my_site/* \', opt$fastqc_site_dir) %>%\n- system()\n-\n-\n+ ##--------end of code rendering .Rmd templates----------------\n+sink()\n+##=========== End of sinking output=============================\n\\ No newline at end of file\n' |
b |
diff -r 600c39b11913 -r 507eec497730 index.Rmd --- a/index.Rmd Tue Aug 15 15:50:21 2017 -0400 +++ b/index.Rmd Tue Nov 07 16:52:24 2017 -0500 |
b |
@@ -4,7 +4,7 @@ --- ```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set(echo = TRUE) +knitr::opts_chunk$set(echo = TRUE, error = TRUE) ``` |