annotate 09_sequence_duplication_levels.Rmd @ 12:68ea2ebbf866 draft

add boxplot for per base sequence quality
author mingchen0919
date Thu, 09 Nov 2017 09:23:43 -0500
parents 507eec497730
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
1 ---
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
2 title: 'Sequence Duplication Levels'
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
3 output:
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
4 html_document:
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
5 number_sections: true
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
6 toc: true
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
7 theme: cosmo
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
8 highlight: tango
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
9 ---
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
10
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
12 knitr::opts_chunk$set(
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
13 echo = ECHO,
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
14 error = TRUE
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
15 )
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
16 ```
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
17
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
18 ### Sequence Duplication Levels
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
19
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
20 ```{r 'Sequence Duplication Levels', fig.width=10}
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
21 ## reads 1
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
22 sdl_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Sequence Duplication Levels', header = FALSE, comment.char = '#')
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
23 names(sdl_1) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total')
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
24 sdl_1$id = 1:length(sdl_1$Duplication_Level)
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
25
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
26 melt_sdl_1 = melt(sdl_1, id=c('Duplication_Level', 'id'))
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
27 melt_sdl_1$trim = 'before'
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
28
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
29
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
30 ## reads 2
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
31 sdl_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Sequence Duplication Levels', header = FALSE, comment.char = '#')
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
32 names(sdl_2) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total')
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
33 sdl_2$id = 1:length(sdl_2$Duplication_Level)
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
34
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
35 melt_sdl_2 = melt(sdl_2, id=c('Duplication_Level', 'id'))
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
36 melt_sdl_2$trim = 'after'
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
37
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
38 comb_sdl = rbind(melt_sdl_1, melt_sdl_2)
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
39 comb_sdl$trim = factor(levels = c('before', 'after'), comb_sdl$trim)
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
40
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
41 p = ggplot(data = comb_sdl, aes(x = id, y = value, color = variable)) +
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
42 geom_line() +
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
43 scale_x_continuous(breaks = sdl_2$id, labels = sdl_2$Duplication_Level) +
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
44 facet_grid(. ~ trim) +
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
45 xlab('Sequence Duplication Level') +
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
46 ylab('') +
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
47 theme(axis.text.x = element_text(angle=45))
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
48 ggplotly(p)
507eec497730 update fastqc site
mingchen0919
parents:
diff changeset
49 ```