# HG changeset patch # User artbio # Date 1601926182 0 # Node ID 7ba08c82688878a18dcd56e15671e6709efcad56 # Parent e332cf9dfa06cbf6f2a762aafd49bd9e4f053fbd "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit e2d6ed12516e1bd24071962a0dfe0220cc348f3c" diff -r e332cf9dfa06 -r 7ba08c826888 mutational_patterns.R --- a/mutational_patterns.R Thu Sep 24 01:32:52 2020 +0000 +++ b/mutational_patterns.R Mon Oct 05 19:29:42 2020 +0000 @@ -7,6 +7,8 @@ library(rjson) library(grid) library(gridExtra) +library(scales) +library(RColorBrewer) # Arguments option_list = list( @@ -117,6 +119,9 @@ ##### This is done for any section ###### mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome) +qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] +col_vector = unique(unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))) +col_vector <- col_vector[c(-32, -34, -39)] # 67-color palette ###### Section 1 Mutation characteristics and spectrums ############# if (!is.na(opt$output_spectrum)[1]) { @@ -204,6 +209,7 @@ cancer_signatures = as.matrix(cancer_signatures[,4:33]) colnames(cancer_signatures) <- gsub("Signature.", "", colnames(cancer_signatures)) # shorten signature labels cosmic_tag <- "Signatures (Cosmic v2, March 2015)" + cosmic_colors <- col_vector[1:30] } else { sp_url <- "https://raw.githubusercontent.com/ARTbio/startbio/master/sigProfiler_SBS_signatures_2019_05_22.tsv" cancer_signatures = read.table(sp_url, sep = "\t", header = TRUE) @@ -213,6 +219,7 @@ cancer_signatures = as.matrix(cancer_signatures[,4:70]) colnames(cancer_signatures) <- gsub("SBS", "", colnames(cancer_signatures)) # shorten signature labels cosmic_tag <- "Signatures (Cosmic v3, May 2019)" + cosmic_colors <- col_vector[1:67] } # Plot mutational profiles of the COSMIC signatures @@ -220,47 +227,34 @@ p6 <- plot_96_profile(cancer_signatures, condensed = TRUE, ymax = 0.3) grid.arrange(p6, top = textGrob("COSMIC signature profiles",gp=gpar(fontsize=12,font=3))) } else { - print(length(cancer_signatures)) p6 <- plot_96_profile(cancer_signatures[,1:33], condensed = TRUE, ymax = 0.3) p6bis <- plot_96_profile(cancer_signatures[,34:67], condensed = TRUE, ymax = 0.3) grid.arrange(p6, top = textGrob("COSMIC signature profiles (on two pages)",gp=gpar(fontsize=12,font=3))) grid.arrange(p6bis, top = textGrob("COSMIC signature profiles (continued)",gp=gpar(fontsize=12,font=3))) } - # Hierarchically cluster the COSMIC signatures based on their similarity with average linkage - # hclust_cosmic = cluster_signatures(cancer_signatures, method = "average") - # store signatures in new order - # cosmic_order = colnames(cancer_signatures)[hclust_cosmic$order] - # plot(hclust_cosmic) - - # Similarity between mutational profiles and COSMIC signatures - - - # The similarity between each mutational profile and each COSMIC signature, can be calculated - # with cos_sim_matrix, and visualized with plot_cosine_heatmap. The cosine similarity reflects - # how well each mutational profile can be explained by each signature individually. The advantage - # of this heatmap representation is that it shows in a glance the similarity in mutational - # profiles between samples, while at the same time providing information on which signatures - # are most prominent. The samples can be hierarchically clustered in plot_cosine_heatmap. - # The cosine similarity between two mutational profiles/signatures can be calculated with cos_sim : - # cos_sim(mut_mat[,1], cancer_signatures[,1]) - - # Calculate pairwise cosine similarity between mutational profiles and COSMIC signatures - - # cos_sim_samples_signatures = cos_sim_matrix(mut_mat, cancer_signatures) - # Plot heatmap with specified signature order - # p.trans <- plot_cosine_heatmap(cos_sim_samples_signatures, col_order = cosmic_order, cluster_rows = TRUE) - # grid.arrange(p.trans) # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles fit_res <- fit_to_signatures(pseudo_mut_mat, cancer_signatures) - # Select signatures with some contribution (above a threshold) - threshold <- tail(sort(unlist(rowSums(fit_res$contribution), use.names = FALSE)), opt$signum)[1] - select <- which(rowSums(fit_res$contribution) >= threshold) # ensure opt$signum best signatures in samples are retained, the others discarded # Plot contribution barplots - pc3 <- plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "absolute") - pc4 <- plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "relative") + pc3 <- plot_contribution(fit_res$contribution, cancer_signatures, coord_flip = T, mode = "absolute") + pc4 <- plot_contribution(fit_res$contribution, cancer_signatures, coord_flip = T, mode = "relative") + pc3_data <- pc3$data + pc3 <- ggplot(pc3_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + + geom_bar(stat="identity", position='stack') + + coord_flip() + + scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + + labs(x = "Samples", y = "Absolute contribution") + theme_bw() + + theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position="right") + pc4_data <- pc4$data + pc4 <- ggplot(pc4_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + + geom_bar(stat="identity", position='fill') + + coord_flip() + + scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + + scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + + labs(x = "Samples", y = "Relative contribution") + theme_bw() + + theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position="right") ##### # ggplot2 alternative if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs @@ -268,24 +262,28 @@ pc3_data <- merge (pc3_data, metadata_table[,c(1,3)], by.x="Sample", by.y="element_identifier") pc3 <- ggplot(pc3_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + geom_bar(stat="identity", position='stack') + - scale_fill_discrete(name="Cosmic\nSignature") + + scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + labs(x = "Samples", y = "Absolute contribution") + theme_bw() + - theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank()) + + theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position="right") + facet_grid(~level, scales = "free_x") pc4_data <- pc4$data pc4_data <- merge (pc4_data, metadata_table[,c(1,3)], by.x="Sample", by.y="element_identifier") pc4 <- ggplot(pc4_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + geom_bar(stat="identity", position='fill') + - scale_fill_discrete(name="Cosmic\nSignature") + + scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + labs(x = "Samples", y = "Relative contribution") + theme_bw() + - theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank()) + + theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position="right") + facet_grid(~level, scales = "free_x") } # Combine the two plots: grid.arrange(pc3, pc4, top = textGrob("Absolute and Relative Contributions of Cosmic signatures to mutational patterns",gp=gpar(fontsize=12,font=3))) + + # Select signatures with some contribution (above a threshold) + threshold <- tail(sort(unlist(rowSums(fit_res$contribution), use.names = FALSE)), opt$signum)[1] + select <- which(rowSums(fit_res$contribution) >= threshold) # ensure opt$signum best signatures in samples are retained, the others discarded + ## pie charts of comic signatures contributions in samples - sig_data_pie <- as.data.frame(t(head(fit_res$contribution[select,]))) colnames(sig_data_pie) <- gsub("nature", "", colnames(sig_data_pie)) sig_data_pie_percents <- sig_data_pie / (apply(sig_data_pie,1,sum)) * 100 @@ -299,22 +297,17 @@ geom_text(aes(label = label), position = position_stack(vjust = 0.5), color="black", size=3) + coord_polar("y", start=0) + facet_wrap(~ sample) + labs(x="", y="Samples", fill = cosmic_tag) + + scale_fill_manual(name = paste0(length(select), " most contributing\nSignatures"), values = cosmic_colors[select]) theme(axis.text = element_blank(), axis.ticks = element_blank(), panel.grid = element_blank()) grid.arrange(p7) # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering - p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") - grid.arrange(p8) - - # Compare the reconstructed mutational profile of sample 1 with its original mutational profile - # plot_compare_profiles(mut_mat[,1], fit_res$reconstructed[,1], - # profile_names = c("Original", "Reconstructed"), - # condensed = TRUE) - - # Calculate the cosine similarity between all original and reconstructed mutational profiles with - # `cos_sim_matrix` + if (length(vcf_paths) > 1) { + p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") + grid.arrange(p8) + } # calculate all pairwise cosine similarities cos_sim_ori_rec <- cos_sim_matrix(pseudo_mut_mat, fit_res$reconstructed) diff -r e332cf9dfa06 -r 7ba08c826888 mutational_patterns.xml --- a/mutational_patterns.xml Thu Sep 24 01:32:52 2020 +0000 +++ b/mutational_patterns.xml Mon Oct 05 19:29:42 2020 +0000 @@ -1,4 +1,4 @@ - + from genomic variations in vcf files bioconductor-mutationalpatterns @@ -218,7 +218,7 @@ - + @@ -231,6 +231,25 @@ + + + + + + + + + + + + + + + + + + + diff -r e332cf9dfa06 -r 7ba08c826888 test-data/cosmic_output1.pdf Binary file test-data/cosmic_output1.pdf has changed diff -r e332cf9dfa06 -r 7ba08c826888 test-data/cosmic_output2.pdf Binary file test-data/cosmic_output2.pdf has changed