Mercurial > repos > mingchen0919 > rmarkdown_fastqc_site
view fastqc_site_render.R @ 12:68ea2ebbf866 draft
add boxplot for per base sequence quality
author | mingchen0919 |
---|---|
date | Thu, 09 Nov 2017 09:23:43 -0500 |
parents | 507eec497730 |
children | a6f8382f852c |
line wrap: on
line source
library(getopt) library(rmarkdown) library(htmltools) library(plyr) library(dplyr) library(stringr) library(highcharter) library(DT) library(reshape2) library(plotly) library(formattable) options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) ##============ Sink warnings and errors to a file ============== ## use the sink() function to wrap all code within it. ##============================================================== zz = file('warnings_and_errors.txt') sink(zz) sink(zz, type = 'message') ##---------below is the code for rendering .Rmd templates----- ##=============STEP 1: handle command line arguments========== ## ##============================================================ # column 1: the long flag name # column 2: the short flag alias. A SINGLE character string # column 3: argument mask # 0: no argument # 1: argument required # 2: argument is optional # column 4: date type to which the flag's argument shall be cast. # possible values: logical, integer, double, complex, character. #------------------------------------------------------------- #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++ # 1. short flag alias should match the flag in the command section in the XML file. # 2. long flag name can be any legal R variable names # 3. two names in args_list can have common string but one name should not be a part of another name. # for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems. #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ args_list=list() ##------- 1. input data --------------------- args_list$ECHO = c('echo', 'e', '1', 'character') args_list$READS_1 = c('reads_1', 'r', '1', 'character') args_list$NAME_1 = c('name_1', 'n', '1', 'character') args_list$READS_2 = c('reads_2', 'R', '1', 'character') args_list$NAME_2 = c('name_2', 'N', '1', 'character') args_list$CONTAMINANTS = c('contaminants', 'c', '1', 'character') args_list$LIMITS = c('limits', 'l', '1', 'character') ##--------2. output report and outputs -------------- args_list$REPORT_HTML = c('report_html', 'o', '1', 'character') args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character') args_list$SINK_MESSAGE = c('sink_message', 's', '1', 'character') ##--------3. .Rmd templates in the tool directory ---------- args_list$SITE_YML = c('site_yml', 'S', '1', 'character') args_list$INDEX_RMD = c('index_rmd', 'I', '1', 'character') args_list$X01_EVALUATION_OVERVIEW = c('x01_evaluation_overview', 'A', '1', 'character') args_list$X02_PER_BASE_SEQUENCE_QUALITY = c('x02_per_base_sequence_quality', 'B', '1', 'character') args_list$X03_PER_TILE_SEQUENCE_QUALITY = c('x03_per_tile_sequence_quality', 'C', '1', 'character') args_list$X04_PER_SEQUENCE_QUALITY_SCORE = c('x04_per_sequence_quality_score', 'D', '1', 'character') args_list$X05_PER_BASE_SEQUENCE_CONTENT = c('x05_per_base_sequence_content', 'E', '1', 'character') args_list$X06_PER_SEQUENCE_GC_CONTENT = c('x06_per_sequence_gc_content', 'F', '1', 'character') args_list$X07_PER_BASE_N_CONTENT = c('x07_per_base_n_content', 'G', '1', 'character') args_list$X08_SEQUENCE_LENGTH_DISTRIBUTION = c('x08_sequence_length_distribution', 'H', '1', 'character') args_list$X09_SEQUENCE_DUPLICATION_LEVELS = c('x09_sequence_duplication_levels', 'J', '1', 'character') args_list$X10_ADAPTER_CONTENT = c('x10_adapter_content', 'K', '1', 'character') args_list$X11_KMER_CONTENT = c('x11_kmer_content', 'L', '1', 'character') ##----------------------------------------------------------- opt = getopt(t(as.data.frame(args_list))) ##=======STEP 2: create report directory (optional)========== ## ##=========================================================== dir.create(opt$report_dir) ##==STEP 3: copy index.Rmd and _site.yml to job working directory====== ## ##===================================================================== file.copy(opt$index_rmd, 'index.Rmd') file.copy(opt$site_yml, '_site.yml') ##=STEP 4: replace placeholders in .Rmd files with argument values= ## ##================================================================= #++ need to replace placeholders with args values one by one+ # 01_evaluation_overview.Rmd readLines(opt$x01_evaluation_overview) %>% (function(x) { gsub('ECHO', opt$echo, x) }) %>% (function(x) { gsub('READS_1', opt$reads_1, x) }) %>% (function(x) { gsub('NAME_1', opt$name_1, x) }) %>% (function(x) { gsub('READS_2', opt$reads_2, x) }) %>% (function(x) { gsub('NAME_2', opt$name_1, x) }) %>% (function(x) { gsub('CONTAMINANTS', opt$contaminants, x) }) %>% (function(x) { gsub('LIMITS', opt$limits, x) }) %>% (function(x) { gsub('REPORT_DIR', opt$report_dir, x) }) %>% (function(x) { fileConn = file('x01_evaluation_overview.Rmd') writeLines(x, con=fileConn) close(fileConn) }) # 02_per_base_sequence_quality.Rmd readLines(opt$x02_per_base_sequence_quality) %>% (function(x) { gsub('ECHO', opt$echo, x) }) %>% (function(x) { gsub('REPORT_DIR', opt$report_dir, x) }) %>% (function(x) { fileConn = file('x02_per_base_sequence_quality.Rmd') writeLines(x, con=fileConn) close(fileConn) }) # 03_per_tile_sequence_quality.Rmd readLines(opt$x03_per_tile_sequence_quality) %>% (function(x) { gsub('ECHO', opt$echo, x) }) %>% (function(x) { gsub('REPORT_DIR', opt$report_dir, x) }) %>% (function(x) { fileConn = file('x03_per_tile_sequence_quality.Rmd') writeLines(x, con=fileConn) close(fileConn) }) # 04_per_sequence_quality_score.Rmd readLines(opt$x04_per_sequence_quality_score) %>% (function(x) { gsub('ECHO', opt$echo, x) }) %>% (function(x) { gsub('REPORT_DIR', opt$report_dir, x) }) %>% (function(x) { fileConn = file('x04_per_sequence_quality_score.Rmd') writeLines(x, con=fileConn) close(fileConn) }) # 05_per_base_sequence_content.Rmd readLines(opt$x05_per_base_sequence_content) %>% (function(x) { gsub('ECHO', opt$echo, x) }) %>% (function(x) { gsub('REPORT_DIR', opt$report_dir, x) }) %>% (function(x) { fileConn = file('x05_per_base_sequence_content.Rmd') writeLines(x, con=fileConn) close(fileConn) }) # 06_per_sequence_gc_content.Rmd readLines(opt$x06_per_sequence_gc_content) %>% (function(x) { gsub('ECHO', opt$echo, x) }) %>% (function(x) { gsub('REPORT_DIR', opt$report_dir, x) }) %>% (function(x) { fileConn = file('x06_per_sequence_gc_content.Rmd') writeLines(x, con=fileConn) close(fileConn) }) # 07_per_base_n_content.Rmd readLines(opt$x07_per_base_n_content) %>% (function(x) { gsub('ECHO', opt$echo, x) }) %>% (function(x) { gsub('REPORT_DIR', opt$report_dir, x) }) %>% (function(x) { fileConn = file('x07_per_base_n_content.Rmd') writeLines(x, con=fileConn) close(fileConn) }) # 08_sequence_length_distribution.Rmd readLines(opt$x08_sequence_length_distribution) %>% (function(x) { gsub('ECHO', opt$echo, x) }) %>% (function(x) { gsub('REPORT_DIR', opt$report_dir, x) }) %>% (function(x) { fileConn = file('x08_sequence_length_distribution.Rmd') writeLines(x, con=fileConn) close(fileConn) }) # 09_sequence_duplication_levels.Rmd readLines(opt$x09_sequence_duplication_levels) %>% (function(x) { gsub('ECHO', opt$echo, x) }) %>% (function(x) { gsub('REPORT_DIR', opt$report_dir, x) }) %>% (function(x) { fileConn = file('x09_sequence_duplication_levels.Rmd') writeLines(x, con=fileConn) close(fileConn) }) # 10_adapter_content.Rmd readLines(opt$x10_adapter_content) %>% (function(x) { gsub('ECHO', opt$echo, x) }) %>% (function(x) { gsub('REPORT_DIR', opt$report_dir, x) }) %>% (function(x) { fileConn = file('x10_adapter_content.Rmd') writeLines(x, con=fileConn) close(fileConn) }) # 11_kmer_content.Rmd readLines(opt$x11_kmer_content) %>% (function(x) { gsub('ECHO', opt$echo, x) }) %>% (function(x) { gsub('REPORT_DIR', opt$report_dir, x) }) %>% (function(x) { fileConn = file('x11_kmer_content.Rmd') writeLines(x, con=fileConn) close(fileConn) }) ##=============STEP 5: render all .Rmd templates================= ## ##=========================================================== extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") { f = readLines(fastqc_data) start_line = grep(module_name, f) end_module_lines = grep('END_MODULE', f) end_line = end_module_lines[which(end_module_lines > start_line)[1]] module_data = f[(start_line+1):(end_line-1)] writeLines(module_data, 'temp.txt') read.csv('temp.txt', sep = '\t', header = header, comment.char = comment.char) } render_site() ##=============STEP 6: manipulate outputs==================== ## ##=========================================================== file.copy('my_site/index.html', opt$report_html, recursive = TRUE) system(paste0('cp -r my_site/* ', opt$report_dir)) ##--------end of code rendering .Rmd templates---------------- sink() ##=========== End of sinking output=============================