annotate fastqc_report_render.R @ 18:8635a4cee6dd draft

add boxplot for per base sequence quality
author mingchen0919
date Thu, 09 Nov 2017 09:22:09 -0500
parents 1710b0e874f1
children 8c79e5b7cfc0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
1 library(getopt)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
2 library(rmarkdown)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
3 library(htmltools)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
4 library(plyr)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
5 library(dplyr)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
6 library(stringr)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
7 library(highcharter)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
8 library(DT)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
9 library(reshape2)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
10 library(plotly)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
11 library(formattable)
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
12 options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)
1
22cd2369354b Uploaded
mingchen0919
parents:
diff changeset
13
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
14 ##============ Sink warnings and errors to a file ==============
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
15 ## use the sink() function to wrap all code within it.
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
16 ##==============================================================
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
17 zz = file('warnings_and_errors.txt')
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
18 sink(zz)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
19 sink(zz, type = 'message')
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
20 ##---------below is the code for rendering .Rmd templates-----
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
21
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
22 ##=============STEP 1: handle command line arguments==========
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
23 ##
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
24 ##============================================================
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
25 # column 1: the long flag name
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
26 # column 2: the short flag alias. A SINGLE character string
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
27 # column 3: argument mask
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
28 # 0: no argument
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
29 # 1: argument required
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
30 # 2: argument is optional
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
31 # column 4: date type to which the flag's argument shall be cast.
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
32 # possible values: logical, integer, double, complex, character.
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
33 #-------------------------------------------------------------
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
34 #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
35 # 1. short flag alias should match the flag in the command section in the XML file.
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
36 # 2. long flag name can be any legal R variable names
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
37 # 3. two names in args_list can have common string but one name should not be a part of another name.
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
38 # for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems.
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
39 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
40 args_list=list()
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
41 ##------- 1. input data ---------------------
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
42 args_list$ECHO = c('echo', 'e', '1', 'character')
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
43 args_list$READS_1 = c('reads_1', 'r', '1', 'character')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
44 args_list$NAME_1 = c('name_1', 'n', '1', 'character')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
45 args_list$READS_2 = c('reads_2', 'R', '1', 'character')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
46 args_list$NAME_2 = c('name_2', 'N', '1', 'character')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
47 args_list$CONTAMINANTS = c('contaminants', 'c', '1', 'character')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
48 args_list$LIMITS = c('limits', 'l', '1', 'character')
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
49 ##--------2. output report and outputs --------------
15
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
50 args_list$REPORT_HTML = c('report_html', 'o', '1', 'character')
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
51 args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character')
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
52 args_list$SINK_MESSAGE = c('sink_message', 's', '1', 'character')
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
53 ##--------3. .Rmd templates in the tool directory ----------
15
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
54 args_list$FASTQC_REPORT_RMD = c('fastqc_report_rmd', 'p', '1', 'character')
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
55 ##-----------------------------------------------------------
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
56 opt = getopt(t(as.data.frame(args_list)))
1
22cd2369354b Uploaded
mingchen0919
parents:
diff changeset
57
22cd2369354b Uploaded
mingchen0919
parents:
diff changeset
58
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
59
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
60 ##=======STEP 2: create report directory (optional)==========
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
61 ##
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
62 ##===========================================================
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
63 dir.create(opt$report_dir)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
64
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
65 ##=STEP 3: replace placeholders in .Rmd with argument values=
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
66 ##
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
67 ##===========================================================
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
68 #++ need to replace placeholders with args values one by one+
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
69 readLines(opt$fastqc_report_rmd) %>%
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
70 (function(x) {
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
71 gsub('ECHO', opt$echo, x)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
72 }) %>%
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
73 (function(x) {
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
74 gsub('READS_1', opt$reads_1, x)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
75 }) %>%
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
76 (function(x) {
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
77 gsub('NAME_1', opt$name_1, x)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
78 }) %>%
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
79 (function(x) {
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
80 gsub('READS_2', opt$reads_2, x)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
81 }) %>%
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
82 (function(x) {
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
83 gsub('NAME_2', opt$name_1, x)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
84 }) %>%
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
85 (function(x) {
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
86 gsub('CONTAMINANTS', opt$contaminants, x)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
87 }) %>%
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
88 (function(x) {
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
89 gsub('LIMITS', opt$limits, x)
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
90 }) %>%
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
91 (function(x) {
15
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
92 gsub('REPORT_DIR', opt$report_dir, x)
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
93 }) %>%
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
94 (function(x) {
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
95 fileConn = file('fastqc_report.Rmd')
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
96 writeLines(x, con=fileConn)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
97 close(fileConn)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
98 })
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
99
1
22cd2369354b Uploaded
mingchen0919
parents:
diff changeset
100
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
101 ##=============STEP 4: render .Rmd templates=================
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
102 ##
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
103 ##===========================================================
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
104 render('fastqc_report.Rmd', output_file = opt$report_html)
1
22cd2369354b Uploaded
mingchen0919
parents:
diff changeset
105
22cd2369354b Uploaded
mingchen0919
parents:
diff changeset
106
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
107 ##--------end of code rendering .Rmd templates----------------
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
108 sink()
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 1
diff changeset
109 ##=========== End of sinking output=============================