comparison fastqc_report.Rmd @ 18:8635a4cee6dd draft

add boxplot for per base sequence quality
author mingchen0919
date Thu, 09 Nov 2017 09:22:09 -0500
parents ac5c618e4d97
children 8c79e5b7cfc0
comparison
equal deleted inserted replaced
17:ac5c618e4d97 18:8635a4cee6dd
137 137
138 ```{r 'per base sequence quality', fig.width=10} 138 ```{r 'per base sequence quality', fig.width=10}
139 ## reads 1 139 ## reads 1
140 pbsq_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base sequence quality') 140 pbsq_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base sequence quality')
141 pbsq_1$id = 1:length(pbsq_1$X.Base) 141 pbsq_1$id = 1:length(pbsq_1$X.Base)
142 142 pbsq_1$trim = 'before'
143 melt_pbsq_1 = filter(melt(pbsq_1, id=c('X.Base', 'id')), variable == 'Mean')
144 melt_pbsq_1$trim = 'before'
145
146 143
147 ## reads 2 144 ## reads 2
148 pbsq_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base sequence quality') 145 pbsq_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base sequence quality')
149 pbsq_2$id = 1:length(pbsq_2$X.Base) 146 pbsq_2$id = 1:length(pbsq_2$X.Base)
150 147 pbsq_2$trim = 'after'
151 melt_pbsq_2 = filter(melt(pbsq_2, id=c('X.Base', 'id')), variable == 'Mean') 148
152 melt_pbsq_2$trim = 'after' 149 comb_pbsq = rbind(pbsq_1, pbsq_2)
153
154 comb_pbsq = rbind(melt_pbsq_1, melt_pbsq_2)
155 comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim) 150 comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim)
156 151
157 p = ggplot(data = comb_pbsq) + 152 p = ggplot(data = comb_pbsq) +
158 geom_line(mapping = aes(x = id, y = value, group = variable, color = variable)) + 153 geom_boxplot(mapping = aes(x = id,
159 scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) + 154 lower = Lower.Quartile,
160 facet_grid(. ~ trim) + 155 upper = Upper.Quartile,
161 ylim(0, max(comb_pbsq$value) + 5) + 156 middle = Median,
157 ymin = X10th.Percentile,
158 ymax = X90th.Percentile,
159 fill = "yellow"),
160 stat = 'identity') +
161 geom_line(mapping = aes(x = id, y = Mean, color = "red")) +
162 scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) +
163 scale_fill_identity() +
164 scale_color_identity() +
165 ylim(0, max(comb_pbsq$Upper.Quartile) + 5) +
166 facet_grid(. ~ trim) +
162 theme(axis.text.x = element_text(angle=45)) 167 theme(axis.text.x = element_text(angle=45))
163 ggplotly(p) 168 p
164 169
165 ``` 170 ```
166 171
167 ### Per tile sequence quality 172 ### Per tile sequence quality
168 173