comparison mutational_patterns.R @ 4:7ba08c826888 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit e2d6ed12516e1bd24071962a0dfe0220cc348f3c"
author artbio
date Mon, 05 Oct 2020 19:29:42 +0000
parents e332cf9dfa06
children fe31d059a482
comparison
equal deleted inserted replaced
3:e332cf9dfa06 4:7ba08c826888
5 warnings() 5 warnings()
6 library(optparse) 6 library(optparse)
7 library(rjson) 7 library(rjson)
8 library(grid) 8 library(grid)
9 library(gridExtra) 9 library(gridExtra)
10 library(scales)
11 library(RColorBrewer)
10 12
11 # Arguments 13 # Arguments
12 option_list = list( 14 option_list = list(
13 make_option( 15 make_option(
14 "--inputs", 16 "--inputs",
115 tissue <- as.vector(metadata_table$level) 117 tissue <- as.vector(metadata_table$level)
116 } 118 }
117 119
118 ##### This is done for any section ###### 120 ##### This is done for any section ######
119 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome) 121 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome)
122 qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
123 col_vector = unique(unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals))))
124 col_vector <- col_vector[c(-32, -34, -39)] # 67-color palette
120 125
121 ###### Section 1 Mutation characteristics and spectrums ############# 126 ###### Section 1 Mutation characteristics and spectrums #############
122 if (!is.na(opt$output_spectrum)[1]) { 127 if (!is.na(opt$output_spectrum)[1]) {
123 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69) 128 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69)
124 type_occurrences <- mut_type_occurrences(vcfs, ref_genome) 129 type_occurrences <- mut_type_occurrences(vcfs, ref_genome)
202 cancer_signatures = cancer_signatures[as.vector(new_order),] 207 cancer_signatures = cancer_signatures[as.vector(new_order),]
203 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type 208 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type
204 cancer_signatures = as.matrix(cancer_signatures[,4:33]) 209 cancer_signatures = as.matrix(cancer_signatures[,4:33])
205 colnames(cancer_signatures) <- gsub("Signature.", "", colnames(cancer_signatures)) # shorten signature labels 210 colnames(cancer_signatures) <- gsub("Signature.", "", colnames(cancer_signatures)) # shorten signature labels
206 cosmic_tag <- "Signatures (Cosmic v2, March 2015)" 211 cosmic_tag <- "Signatures (Cosmic v2, March 2015)"
212 cosmic_colors <- col_vector[1:30]
207 } else { 213 } else {
208 sp_url <- "https://raw.githubusercontent.com/ARTbio/startbio/master/sigProfiler_SBS_signatures_2019_05_22.tsv" 214 sp_url <- "https://raw.githubusercontent.com/ARTbio/startbio/master/sigProfiler_SBS_signatures_2019_05_22.tsv"
209 cancer_signatures = read.table(sp_url, sep = "\t", header = TRUE) 215 cancer_signatures = read.table(sp_url, sep = "\t", header = TRUE)
210 new_order = match(row.names(pseudo_mut_mat), cancer_signatures$Somatic.Mutation.Type) 216 new_order = match(row.names(pseudo_mut_mat), cancer_signatures$Somatic.Mutation.Type)
211 cancer_signatures = cancer_signatures[as.vector(new_order),] 217 cancer_signatures = cancer_signatures[as.vector(new_order),]
212 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type 218 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type
213 cancer_signatures = as.matrix(cancer_signatures[,4:70]) 219 cancer_signatures = as.matrix(cancer_signatures[,4:70])
214 colnames(cancer_signatures) <- gsub("SBS", "", colnames(cancer_signatures)) # shorten signature labels 220 colnames(cancer_signatures) <- gsub("SBS", "", colnames(cancer_signatures)) # shorten signature labels
215 cosmic_tag <- "Signatures (Cosmic v3, May 2019)" 221 cosmic_tag <- "Signatures (Cosmic v3, May 2019)"
222 cosmic_colors <- col_vector[1:67]
216 } 223 }
217 224
218 # Plot mutational profiles of the COSMIC signatures 225 # Plot mutational profiles of the COSMIC signatures
219 if (opt$cosmic_version == "v2") { 226 if (opt$cosmic_version == "v2") {
220 p6 <- plot_96_profile(cancer_signatures, condensed = TRUE, ymax = 0.3) 227 p6 <- plot_96_profile(cancer_signatures, condensed = TRUE, ymax = 0.3)
221 grid.arrange(p6, top = textGrob("COSMIC signature profiles",gp=gpar(fontsize=12,font=3))) 228 grid.arrange(p6, top = textGrob("COSMIC signature profiles",gp=gpar(fontsize=12,font=3)))
222 } else { 229 } else {
223 print(length(cancer_signatures))
224 p6 <- plot_96_profile(cancer_signatures[,1:33], condensed = TRUE, ymax = 0.3) 230 p6 <- plot_96_profile(cancer_signatures[,1:33], condensed = TRUE, ymax = 0.3)
225 p6bis <- plot_96_profile(cancer_signatures[,34:67], condensed = TRUE, ymax = 0.3) 231 p6bis <- plot_96_profile(cancer_signatures[,34:67], condensed = TRUE, ymax = 0.3)
226 grid.arrange(p6, top = textGrob("COSMIC signature profiles (on two pages)",gp=gpar(fontsize=12,font=3))) 232 grid.arrange(p6, top = textGrob("COSMIC signature profiles (on two pages)",gp=gpar(fontsize=12,font=3)))
227 grid.arrange(p6bis, top = textGrob("COSMIC signature profiles (continued)",gp=gpar(fontsize=12,font=3))) 233 grid.arrange(p6bis, top = textGrob("COSMIC signature profiles (continued)",gp=gpar(fontsize=12,font=3)))
228 } 234 }
229 235
230 # Hierarchically cluster the COSMIC signatures based on their similarity with average linkage
231 # hclust_cosmic = cluster_signatures(cancer_signatures, method = "average")
232 # store signatures in new order
233 # cosmic_order = colnames(cancer_signatures)[hclust_cosmic$order]
234 # plot(hclust_cosmic)
235
236 # Similarity between mutational profiles and COSMIC signatures
237
238
239 # The similarity between each mutational profile and each COSMIC signature, can be calculated
240 # with cos_sim_matrix, and visualized with plot_cosine_heatmap. The cosine similarity reflects
241 # how well each mutational profile can be explained by each signature individually. The advantage
242 # of this heatmap representation is that it shows in a glance the similarity in mutational
243 # profiles between samples, while at the same time providing information on which signatures
244 # are most prominent. The samples can be hierarchically clustered in plot_cosine_heatmap.
245 # The cosine similarity between two mutational profiles/signatures can be calculated with cos_sim :
246 # cos_sim(mut_mat[,1], cancer_signatures[,1])
247
248 # Calculate pairwise cosine similarity between mutational profiles and COSMIC signatures
249
250 # cos_sim_samples_signatures = cos_sim_matrix(mut_mat, cancer_signatures)
251 # Plot heatmap with specified signature order
252 # p.trans <- plot_cosine_heatmap(cos_sim_samples_signatures, col_order = cosmic_order, cluster_rows = TRUE)
253 # grid.arrange(p.trans)
254 236
255 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles 237 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles
256 fit_res <- fit_to_signatures(pseudo_mut_mat, cancer_signatures) 238 fit_res <- fit_to_signatures(pseudo_mut_mat, cancer_signatures)
257 239
258 # Select signatures with some contribution (above a threshold)
259 threshold <- tail(sort(unlist(rowSums(fit_res$contribution), use.names = FALSE)), opt$signum)[1]
260 select <- which(rowSums(fit_res$contribution) >= threshold) # ensure opt$signum best signatures in samples are retained, the others discarded
261 # Plot contribution barplots 240 # Plot contribution barplots
262 pc3 <- plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "absolute") 241 pc3 <- plot_contribution(fit_res$contribution, cancer_signatures, coord_flip = T, mode = "absolute")
263 pc4 <- plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "relative") 242 pc4 <- plot_contribution(fit_res$contribution, cancer_signatures, coord_flip = T, mode = "relative")
243 pc3_data <- pc3$data
244 pc3 <- ggplot(pc3_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) +
245 geom_bar(stat="identity", position='stack') +
246 coord_flip() +
247 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) +
248 labs(x = "Samples", y = "Absolute contribution") + theme_bw() +
249 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position="right")
250 pc4_data <- pc4$data
251 pc4 <- ggplot(pc4_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) +
252 geom_bar(stat="identity", position='fill') +
253 coord_flip() +
254 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) +
255 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
256 labs(x = "Samples", y = "Relative contribution") + theme_bw() +
257 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position="right")
264 ##### 258 #####
265 # ggplot2 alternative 259 # ggplot2 alternative
266 if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs 260 if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs
267 pc3_data <- pc3$data 261 pc3_data <- pc3$data
268 pc3_data <- merge (pc3_data, metadata_table[,c(1,3)], by.x="Sample", by.y="element_identifier") 262 pc3_data <- merge (pc3_data, metadata_table[,c(1,3)], by.x="Sample", by.y="element_identifier")
269 pc3 <- ggplot(pc3_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + 263 pc3 <- ggplot(pc3_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) +
270 geom_bar(stat="identity", position='stack') + 264 geom_bar(stat="identity", position='stack') +
271 scale_fill_discrete(name="Cosmic\nSignature") + 265 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) +
272 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + 266 labs(x = "Samples", y = "Absolute contribution") + theme_bw() +
273 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank()) + 267 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position="right") +
274 facet_grid(~level, scales = "free_x") 268 facet_grid(~level, scales = "free_x")
275 pc4_data <- pc4$data 269 pc4_data <- pc4$data
276 pc4_data <- merge (pc4_data, metadata_table[,c(1,3)], by.x="Sample", by.y="element_identifier") 270 pc4_data <- merge (pc4_data, metadata_table[,c(1,3)], by.x="Sample", by.y="element_identifier")
277 pc4 <- ggplot(pc4_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + 271 pc4 <- ggplot(pc4_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) +
278 geom_bar(stat="identity", position='fill') + 272 geom_bar(stat="identity", position='fill') +
279 scale_fill_discrete(name="Cosmic\nSignature") + 273 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) +
280 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + 274 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
281 labs(x = "Samples", y = "Relative contribution") + theme_bw() + 275 labs(x = "Samples", y = "Relative contribution") + theme_bw() +
282 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank()) + 276 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position="right") +
283 facet_grid(~level, scales = "free_x") 277 facet_grid(~level, scales = "free_x")
284 } 278 }
285 # Combine the two plots: 279 # Combine the two plots:
286 grid.arrange(pc3, pc4, top = textGrob("Absolute and Relative Contributions of Cosmic signatures to mutational patterns",gp=gpar(fontsize=12,font=3))) 280 grid.arrange(pc3, pc4, top = textGrob("Absolute and Relative Contributions of Cosmic signatures to mutational patterns",gp=gpar(fontsize=12,font=3)))
281
282 # Select signatures with some contribution (above a threshold)
283 threshold <- tail(sort(unlist(rowSums(fit_res$contribution), use.names = FALSE)), opt$signum)[1]
284 select <- which(rowSums(fit_res$contribution) >= threshold) # ensure opt$signum best signatures in samples are retained, the others discarded
285
287 ## pie charts of comic signatures contributions in samples 286 ## pie charts of comic signatures contributions in samples
288
289 sig_data_pie <- as.data.frame(t(head(fit_res$contribution[select,]))) 287 sig_data_pie <- as.data.frame(t(head(fit_res$contribution[select,])))
290 colnames(sig_data_pie) <- gsub("nature", "", colnames(sig_data_pie)) 288 colnames(sig_data_pie) <- gsub("nature", "", colnames(sig_data_pie))
291 sig_data_pie_percents <- sig_data_pie / (apply(sig_data_pie,1,sum)) * 100 289 sig_data_pie_percents <- sig_data_pie / (apply(sig_data_pie,1,sum)) * 100
292 sig_data_pie_percents$sample <- rownames(sig_data_pie_percents) 290 sig_data_pie_percents$sample <- rownames(sig_data_pie_percents)
293 library(reshape2) 291 library(reshape2)
297 p7 <- ggplot(melted_sig_data_pie_percents, aes(x="", y=value, group=variable, fill=variable)) + 295 p7 <- ggplot(melted_sig_data_pie_percents, aes(x="", y=value, group=variable, fill=variable)) +
298 geom_bar(width = 1, stat = "identity") + 296 geom_bar(width = 1, stat = "identity") +
299 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color="black", size=3) + 297 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color="black", size=3) +
300 coord_polar("y", start=0) + facet_wrap(~ sample) + 298 coord_polar("y", start=0) + facet_wrap(~ sample) +
301 labs(x="", y="Samples", fill = cosmic_tag) + 299 labs(x="", y="Samples", fill = cosmic_tag) +
300 scale_fill_manual(name = paste0(length(select), " most contributing\nSignatures"), values = cosmic_colors[select])
302 theme(axis.text = element_blank(), 301 theme(axis.text = element_blank(),
303 axis.ticks = element_blank(), 302 axis.ticks = element_blank(),
304 panel.grid = element_blank()) 303 panel.grid = element_blank())
305 grid.arrange(p7) 304 grid.arrange(p7)
306 305
307 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering 306 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering
308 p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") 307 if (length(vcf_paths) > 1) {
309 grid.arrange(p8) 308 p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete")
310 309 grid.arrange(p8)
311 # Compare the reconstructed mutational profile of sample 1 with its original mutational profile 310 }
312 # plot_compare_profiles(mut_mat[,1], fit_res$reconstructed[,1],
313 # profile_names = c("Original", "Reconstructed"),
314 # condensed = TRUE)
315
316 # Calculate the cosine similarity between all original and reconstructed mutational profiles with
317 # `cos_sim_matrix`
318 311
319 # calculate all pairwise cosine similarities 312 # calculate all pairwise cosine similarities
320 cos_sim_ori_rec <- cos_sim_matrix(pseudo_mut_mat, fit_res$reconstructed) 313 cos_sim_ori_rec <- cos_sim_matrix(pseudo_mut_mat, fit_res$reconstructed)
321 # extract cosine similarities per sample between original and reconstructed 314 # extract cosine similarities per sample between original and reconstructed
322 cos_sim_ori_rec <- as.data.frame(diag(cos_sim_ori_rec)) 315 cos_sim_ori_rec <- as.data.frame(diag(cos_sim_ori_rec))