Mercurial > repos > mingchen0919 > rmarkdown_fastqc_site
annotate fastqc_site_render.R @ 12:68ea2ebbf866 draft
add boxplot for per base sequence quality
author | mingchen0919 |
---|---|
date | Thu, 09 Nov 2017 09:23:43 -0500 |
parents | 507eec497730 |
children | a6f8382f852c |
rev | line source |
---|---|
11 | 1 library(getopt) |
7
d820be692d74
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
2 library(rmarkdown) |
11 | 3 library(htmltools) |
7
d820be692d74
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
4 library(plyr) |
11 | 5 library(dplyr) |
7
d820be692d74
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
6 library(stringr) |
d820be692d74
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
7 library(highcharter) |
d820be692d74
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
8 library(DT) |
d820be692d74
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
9 library(reshape2) |
d820be692d74
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
10 library(plotly) |
d820be692d74
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
11 library(formattable) |
11 | 12 options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) |
7
d820be692d74
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
13 |
11 | 14 ##============ Sink warnings and errors to a file ============== |
15 ## use the sink() function to wrap all code within it. | |
16 ##============================================================== | |
17 zz = file('warnings_and_errors.txt') | |
18 sink(zz) | |
19 sink(zz, type = 'message') | |
20 ##---------below is the code for rendering .Rmd templates----- | |
21 | |
22 ##=============STEP 1: handle command line arguments========== | |
23 ## | |
24 ##============================================================ | |
25 # column 1: the long flag name | |
26 # column 2: the short flag alias. A SINGLE character string | |
27 # column 3: argument mask | |
28 # 0: no argument | |
29 # 1: argument required | |
30 # 2: argument is optional | |
31 # column 4: date type to which the flag's argument shall be cast. | |
32 # possible values: logical, integer, double, complex, character. | |
33 #------------------------------------------------------------- | |
34 #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++ | |
35 # 1. short flag alias should match the flag in the command section in the XML file. | |
36 # 2. long flag name can be any legal R variable names | |
37 # 3. two names in args_list can have common string but one name should not be a part of another name. | |
38 # for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems. | |
39 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | |
40 args_list=list() | |
41 ##------- 1. input data --------------------- | |
42 args_list$ECHO = c('echo', 'e', '1', 'character') | |
43 args_list$READS_1 = c('reads_1', 'r', '1', 'character') | |
44 args_list$NAME_1 = c('name_1', 'n', '1', 'character') | |
45 args_list$READS_2 = c('reads_2', 'R', '1', 'character') | |
46 args_list$NAME_2 = c('name_2', 'N', '1', 'character') | |
47 args_list$CONTAMINANTS = c('contaminants', 'c', '1', 'character') | |
48 args_list$LIMITS = c('limits', 'l', '1', 'character') | |
49 ##--------2. output report and outputs -------------- | |
50 args_list$REPORT_HTML = c('report_html', 'o', '1', 'character') | |
51 args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character') | |
52 args_list$SINK_MESSAGE = c('sink_message', 's', '1', 'character') | |
53 ##--------3. .Rmd templates in the tool directory ---------- | |
54 args_list$SITE_YML = c('site_yml', 'S', '1', 'character') | |
55 args_list$INDEX_RMD = c('index_rmd', 'I', '1', 'character') | |
56 args_list$X01_EVALUATION_OVERVIEW = c('x01_evaluation_overview', 'A', '1', 'character') | |
57 args_list$X02_PER_BASE_SEQUENCE_QUALITY = c('x02_per_base_sequence_quality', 'B', '1', 'character') | |
58 args_list$X03_PER_TILE_SEQUENCE_QUALITY = c('x03_per_tile_sequence_quality', 'C', '1', 'character') | |
59 args_list$X04_PER_SEQUENCE_QUALITY_SCORE = c('x04_per_sequence_quality_score', 'D', '1', 'character') | |
60 args_list$X05_PER_BASE_SEQUENCE_CONTENT = c('x05_per_base_sequence_content', 'E', '1', 'character') | |
61 args_list$X06_PER_SEQUENCE_GC_CONTENT = c('x06_per_sequence_gc_content', 'F', '1', 'character') | |
62 args_list$X07_PER_BASE_N_CONTENT = c('x07_per_base_n_content', 'G', '1', 'character') | |
63 args_list$X08_SEQUENCE_LENGTH_DISTRIBUTION = c('x08_sequence_length_distribution', 'H', '1', 'character') | |
64 args_list$X09_SEQUENCE_DUPLICATION_LEVELS = c('x09_sequence_duplication_levels', 'J', '1', 'character') | |
65 args_list$X10_ADAPTER_CONTENT = c('x10_adapter_content', 'K', '1', 'character') | |
66 args_list$X11_KMER_CONTENT = c('x11_kmer_content', 'L', '1', 'character') | |
67 ##----------------------------------------------------------- | |
68 opt = getopt(t(as.data.frame(args_list))) | |
7
d820be692d74
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
69 |
d820be692d74
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
70 |
11 | 71 |
72 ##=======STEP 2: create report directory (optional)========== | |
73 ## | |
74 ##=========================================================== | |
75 dir.create(opt$report_dir) | |
76 | |
77 ##==STEP 3: copy index.Rmd and _site.yml to job working directory====== | |
78 ## | |
79 ##===================================================================== | |
80 file.copy(opt$index_rmd, 'index.Rmd') | |
81 file.copy(opt$site_yml, '_site.yml') | |
82 | |
83 ##=STEP 4: replace placeholders in .Rmd files with argument values= | |
84 ## | |
85 ##================================================================= | |
86 #++ need to replace placeholders with args values one by one+ | |
87 | |
88 # 01_evaluation_overview.Rmd | |
89 readLines(opt$x01_evaluation_overview) %>% | |
90 (function(x) { | |
91 gsub('ECHO', opt$echo, x) | |
92 }) %>% | |
93 (function(x) { | |
94 gsub('READS_1', opt$reads_1, x) | |
95 }) %>% | |
96 (function(x) { | |
97 gsub('NAME_1', opt$name_1, x) | |
98 }) %>% | |
99 (function(x) { | |
100 gsub('READS_2', opt$reads_2, x) | |
101 }) %>% | |
102 (function(x) { | |
103 gsub('NAME_2', opt$name_1, x) | |
104 }) %>% | |
105 (function(x) { | |
106 gsub('CONTAMINANTS', opt$contaminants, x) | |
107 }) %>% | |
108 (function(x) { | |
109 gsub('LIMITS', opt$limits, x) | |
110 }) %>% | |
111 (function(x) { | |
112 gsub('REPORT_DIR', opt$report_dir, x) | |
113 }) %>% | |
114 (function(x) { | |
115 fileConn = file('x01_evaluation_overview.Rmd') | |
116 writeLines(x, con=fileConn) | |
117 close(fileConn) | |
118 }) | |
119 | |
120 # 02_per_base_sequence_quality.Rmd | |
121 readLines(opt$x02_per_base_sequence_quality) %>% | |
122 (function(x) { | |
123 gsub('ECHO', opt$echo, x) | |
124 }) %>% | |
125 (function(x) { | |
126 gsub('REPORT_DIR', opt$report_dir, x) | |
127 }) %>% | |
128 (function(x) { | |
129 fileConn = file('x02_per_base_sequence_quality.Rmd') | |
130 writeLines(x, con=fileConn) | |
131 close(fileConn) | |
132 }) | |
133 | |
134 # 03_per_tile_sequence_quality.Rmd | |
135 readLines(opt$x03_per_tile_sequence_quality) %>% | |
136 (function(x) { | |
137 gsub('ECHO', opt$echo, x) | |
138 }) %>% | |
139 (function(x) { | |
140 gsub('REPORT_DIR', opt$report_dir, x) | |
141 }) %>% | |
142 (function(x) { | |
143 fileConn = file('x03_per_tile_sequence_quality.Rmd') | |
144 writeLines(x, con=fileConn) | |
145 close(fileConn) | |
146 }) | |
147 | |
148 # 04_per_sequence_quality_score.Rmd | |
149 readLines(opt$x04_per_sequence_quality_score) %>% | |
150 (function(x) { | |
151 gsub('ECHO', opt$echo, x) | |
152 }) %>% | |
153 (function(x) { | |
154 gsub('REPORT_DIR', opt$report_dir, x) | |
155 }) %>% | |
156 (function(x) { | |
157 fileConn = file('x04_per_sequence_quality_score.Rmd') | |
158 writeLines(x, con=fileConn) | |
159 close(fileConn) | |
160 }) | |
161 | |
162 # 05_per_base_sequence_content.Rmd | |
163 readLines(opt$x05_per_base_sequence_content) %>% | |
164 (function(x) { | |
165 gsub('ECHO', opt$echo, x) | |
166 }) %>% | |
167 (function(x) { | |
168 gsub('REPORT_DIR', opt$report_dir, x) | |
169 }) %>% | |
170 (function(x) { | |
171 fileConn = file('x05_per_base_sequence_content.Rmd') | |
172 writeLines(x, con=fileConn) | |
173 close(fileConn) | |
174 }) | |
175 | |
176 # 06_per_sequence_gc_content.Rmd | |
177 readLines(opt$x06_per_sequence_gc_content) %>% | |
178 (function(x) { | |
179 gsub('ECHO', opt$echo, x) | |
180 }) %>% | |
181 (function(x) { | |
182 gsub('REPORT_DIR', opt$report_dir, x) | |
183 }) %>% | |
184 (function(x) { | |
185 fileConn = file('x06_per_sequence_gc_content.Rmd') | |
186 writeLines(x, con=fileConn) | |
187 close(fileConn) | |
188 }) | |
189 | |
190 # 07_per_base_n_content.Rmd | |
191 readLines(opt$x07_per_base_n_content) %>% | |
192 (function(x) { | |
193 gsub('ECHO', opt$echo, x) | |
194 }) %>% | |
195 (function(x) { | |
196 gsub('REPORT_DIR', opt$report_dir, x) | |
197 }) %>% | |
198 (function(x) { | |
199 fileConn = file('x07_per_base_n_content.Rmd') | |
200 writeLines(x, con=fileConn) | |
201 close(fileConn) | |
202 }) | |
7
d820be692d74
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
203 |
11 | 204 # 08_sequence_length_distribution.Rmd |
205 readLines(opt$x08_sequence_length_distribution) %>% | |
206 (function(x) { | |
207 gsub('ECHO', opt$echo, x) | |
208 }) %>% | |
209 (function(x) { | |
210 gsub('REPORT_DIR', opt$report_dir, x) | |
211 }) %>% | |
212 (function(x) { | |
213 fileConn = file('x08_sequence_length_distribution.Rmd') | |
214 writeLines(x, con=fileConn) | |
215 close(fileConn) | |
216 }) | |
217 | |
218 # 09_sequence_duplication_levels.Rmd | |
219 readLines(opt$x09_sequence_duplication_levels) %>% | |
220 (function(x) { | |
221 gsub('ECHO', opt$echo, x) | |
222 }) %>% | |
223 (function(x) { | |
224 gsub('REPORT_DIR', opt$report_dir, x) | |
225 }) %>% | |
226 (function(x) { | |
227 fileConn = file('x09_sequence_duplication_levels.Rmd') | |
228 writeLines(x, con=fileConn) | |
229 close(fileConn) | |
230 }) | |
231 | |
232 # 10_adapter_content.Rmd | |
233 readLines(opt$x10_adapter_content) %>% | |
234 (function(x) { | |
235 gsub('ECHO', opt$echo, x) | |
236 }) %>% | |
237 (function(x) { | |
238 gsub('REPORT_DIR', opt$report_dir, x) | |
239 }) %>% | |
240 (function(x) { | |
241 fileConn = file('x10_adapter_content.Rmd') | |
242 writeLines(x, con=fileConn) | |
243 close(fileConn) | |
244 }) | |
245 | |
246 # 11_kmer_content.Rmd | |
247 readLines(opt$x11_kmer_content) %>% | |
248 (function(x) { | |
249 gsub('ECHO', opt$echo, x) | |
250 }) %>% | |
251 (function(x) { | |
252 gsub('REPORT_DIR', opt$report_dir, x) | |
253 }) %>% | |
254 (function(x) { | |
255 fileConn = file('x11_kmer_content.Rmd') | |
256 writeLines(x, con=fileConn) | |
257 close(fileConn) | |
258 }) | |
259 | |
260 ##=============STEP 5: render all .Rmd templates================= | |
261 ## | |
262 ##=========================================================== | |
263 extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") { | |
264 f = readLines(fastqc_data) | |
265 start_line = grep(module_name, f) | |
266 end_module_lines = grep('END_MODULE', f) | |
267 end_line = end_module_lines[which(end_module_lines > start_line)[1]] | |
268 module_data = f[(start_line+1):(end_line-1)] | |
269 writeLines(module_data, 'temp.txt') | |
270 read.csv('temp.txt', sep = '\t', header = header, comment.char = comment.char) | |
271 } | |
272 render_site() | |
273 | |
274 ##=============STEP 6: manipulate outputs==================== | |
275 ## | |
276 ##=========================================================== | |
277 file.copy('my_site/index.html', opt$report_html, recursive = TRUE) | |
278 system(paste0('cp -r my_site/* ', opt$report_dir)) | |
7
d820be692d74
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
279 |
d820be692d74
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
280 |
11 | 281 ##--------end of code rendering .Rmd templates---------------- |
282 sink() | |
283 ##=========== End of sinking output============================= |