annotate 02_per_base_sequence_quality.Rmd @ 12:68ea2ebbf866 draft

add boxplot for per base sequence quality
author mingchen0919
date Thu, 09 Nov 2017 09:23:43 -0500
parents 507eec497730
children a6bf4dfca096
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
1 ---
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
2 title: 'Per base sequence quality'
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
3 output:
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
4 html_document:
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
5 number_sections: true
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
6 toc: true
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
7 theme: cosmo
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
8 highlight: tango
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
9 ---
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
10
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
12 knitr::opts_chunk$set(
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
13 echo = ECHO,
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
14 error = TRUE
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
15 )
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
16 ```
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
17
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
18 ### Per base sequence quality
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
19
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
20 ```{r 'per base sequence quality', fig.width=10}
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
21 ## reads 1
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
22 pbsq_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base sequence quality')
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
23 pbsq_1$id = 1:length(pbsq_1$X.Base)
12
68ea2ebbf866 add boxplot for per base sequence quality
mingchen0919
parents: 11
diff changeset
24 pbsq_1$trim = 'before'
11
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
25
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
26 ## reads 2
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
27 pbsq_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base sequence quality')
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
28 pbsq_2$id = 1:length(pbsq_2$X.Base)
12
68ea2ebbf866 add boxplot for per base sequence quality
mingchen0919
parents: 11
diff changeset
29 pbsq_2$trim = 'after'
11
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
30
12
68ea2ebbf866 add boxplot for per base sequence quality
mingchen0919
parents: 11
diff changeset
31 comb_pbsq = rbind(pbsq_1, pbsq_2)
11
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
32 comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim)
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
33
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
34 p = ggplot(data = comb_pbsq) +
12
68ea2ebbf866 add boxplot for per base sequence quality
mingchen0919
parents: 11
diff changeset
35 geom_boxplot(mapping = aes(x = id,
68ea2ebbf866 add boxplot for per base sequence quality
mingchen0919
parents: 11
diff changeset
36 lower = Lower.Quartile,
68ea2ebbf866 add boxplot for per base sequence quality
mingchen0919
parents: 11
diff changeset
37 upper = Upper.Quartile,
68ea2ebbf866 add boxplot for per base sequence quality
mingchen0919
parents: 11
diff changeset
38 middle = Median,
68ea2ebbf866 add boxplot for per base sequence quality
mingchen0919
parents: 11
diff changeset
39 ymin = X10th.Percentile,
68ea2ebbf866 add boxplot for per base sequence quality
mingchen0919
parents: 11
diff changeset
40 ymax = X90th.Percentile,
68ea2ebbf866 add boxplot for per base sequence quality
mingchen0919
parents: 11
diff changeset
41 fill = "yellow"),
68ea2ebbf866 add boxplot for per base sequence quality
mingchen0919
parents: 11
diff changeset
42 stat = 'identity') +
68ea2ebbf866 add boxplot for per base sequence quality
mingchen0919
parents: 11
diff changeset
43 geom_line(mapping = aes(x = id, y = Mean, color = "red")) +
68ea2ebbf866 add boxplot for per base sequence quality
mingchen0919
parents: 11
diff changeset
44 scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) +
68ea2ebbf866 add boxplot for per base sequence quality
mingchen0919
parents: 11
diff changeset
45 scale_fill_identity() +
68ea2ebbf866 add boxplot for per base sequence quality
mingchen0919
parents: 11
diff changeset
46 scale_color_identity() +
68ea2ebbf866 add boxplot for per base sequence quality
mingchen0919
parents: 11
diff changeset
47 ylim(0, max(comb_pbsq$Upper.Quartile) + 5) +
68ea2ebbf866 add boxplot for per base sequence quality
mingchen0919
parents: 11
diff changeset
48 facet_grid(. ~ trim) +
11
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
49 theme(axis.text.x = element_text(angle=45))
12
68ea2ebbf866 add boxplot for per base sequence quality
mingchen0919
parents: 11
diff changeset
50 p
11
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
51
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
52 ```