comparison 02_per_base_sequence_quality.Rmd @ 12:68ea2ebbf866 draft

add boxplot for per base sequence quality
author mingchen0919
date Thu, 09 Nov 2017 09:23:43 -0500
parents 507eec497730
children a6bf4dfca096
comparison
equal deleted inserted replaced
11:507eec497730 12:68ea2ebbf866
19 19
20 ```{r 'per base sequence quality', fig.width=10} 20 ```{r 'per base sequence quality', fig.width=10}
21 ## reads 1 21 ## reads 1
22 pbsq_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base sequence quality') 22 pbsq_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base sequence quality')
23 pbsq_1$id = 1:length(pbsq_1$X.Base) 23 pbsq_1$id = 1:length(pbsq_1$X.Base)
24 24 pbsq_1$trim = 'before'
25 melt_pbsq_1 = filter(melt(pbsq_1, id=c('X.Base', 'id')), variable == 'Mean')
26 melt_pbsq_1$trim = 'before'
27
28 25
29 ## reads 2 26 ## reads 2
30 pbsq_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base sequence quality') 27 pbsq_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base sequence quality')
31 pbsq_2$id = 1:length(pbsq_2$X.Base) 28 pbsq_2$id = 1:length(pbsq_2$X.Base)
29 pbsq_2$trim = 'after'
32 30
33 melt_pbsq_2 = filter(melt(pbsq_2, id=c('X.Base', 'id')), variable == 'Mean') 31 comb_pbsq = rbind(pbsq_1, pbsq_2)
34 melt_pbsq_2$trim = 'after'
35
36 comb_pbsq = rbind(melt_pbsq_1, melt_pbsq_2)
37 comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim) 32 comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim)
38 33
39 p = ggplot(data = comb_pbsq) + 34 p = ggplot(data = comb_pbsq) +
40 geom_line(mapping = aes(x = id, y = value, group = variable, color = variable)) + 35 geom_boxplot(mapping = aes(x = id,
41 scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) + 36 lower = Lower.Quartile,
42 facet_grid(. ~ trim) + 37 upper = Upper.Quartile,
43 ylim(0, max(comb_pbsq$value) + 5) + 38 middle = Median,
39 ymin = X10th.Percentile,
40 ymax = X90th.Percentile,
41 fill = "yellow"),
42 stat = 'identity') +
43 geom_line(mapping = aes(x = id, y = Mean, color = "red")) +
44 scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) +
45 scale_fill_identity() +
46 scale_color_identity() +
47 ylim(0, max(comb_pbsq$Upper.Quartile) + 5) +
48 facet_grid(. ~ trim) +
44 theme(axis.text.x = element_text(angle=45)) 49 theme(axis.text.x = element_text(angle=45))
45 ggplotly(p) 50 p
46 51
47 ``` 52 ```