comparison mutational_patterns.R @ 8:e0dad46148bf draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit 75fd87e806f9bee2f6ff7fcd3834e55eb21d8710"
author artbio
date Mon, 19 Oct 2020 07:47:24 +0000
parents 34626b2b9907
children 4ff3c984523b
comparison
equal deleted inserted replaced
7:34626b2b9907 8:e0dad46148bf
108 library(ref_genome, character.only = TRUE) 108 library(ref_genome, character.only = TRUE)
109 library(ggplot2) 109 library(ggplot2)
110 110
111 # Load the VCF files into a GRangesList: 111 # Load the VCF files into a GRangesList:
112 vcfs <- read_vcfs_as_granges(vcf_paths, element_identifiers, ref_genome) 112 vcfs <- read_vcfs_as_granges(vcf_paths, element_identifiers, ref_genome)
113 library(plyr)
113 if (!is.na(opt$levels)[1]) { # manage levels if there are 114 if (!is.na(opt$levels)[1]) { # manage levels if there are
114 levels_table <- read.delim(opt$levels, header=FALSE, col.names=c("element_identifier","level")) 115 levels_table <- read.delim(opt$levels, header=FALSE, col.names=c("element_identifier","level"))
115 library(plyr) 116 } else {
116 metadata_table <- join(vcf_table, levels_table, by = "element_identifier") 117 levels_table <- data.frame(element_identifier=vcf_table$element_identifier,
117 tissue <- as.vector(metadata_table$level) 118 level=rep("nolabels", length(vcf_table$element_identifier)))
118 } 119 }
120 metadata_table <- join(vcf_table, levels_table, by = "element_identifier")
121 tissue <- as.vector(metadata_table$level)
122 detach(package:plyr)
119 123
120 ##### This is done for any section ###### 124 ##### This is done for any section ######
121 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome) 125 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome)
122 qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] 126 qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
123 col_vector = unique(unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))) 127 col_vector = unique(unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals))))
128 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69) 132 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69)
129 type_occurrences <- mut_type_occurrences(vcfs, ref_genome) 133 type_occurrences <- mut_type_occurrences(vcfs, ref_genome)
130 134
131 # mutation spectrum, total or by sample 135 # mutation spectrum, total or by sample
132 136
133 if (is.na(opt$levels)[1]) { 137 if (length(levels(factor(levels_table$level))) == 1) { # (is.na(opt$levels)[1])
134 p1 <- plot_spectrum(type_occurrences, CT = TRUE, legend=TRUE) 138 p1 <- plot_spectrum(type_occurrences, CT = TRUE, legend=TRUE)
135 plot(p1) 139 plot(p1)
136 } else { 140 } else {
137 p2 <- plot_spectrum(type_occurrences, by = tissue, CT=TRUE) # by levels 141 p2 <- plot_spectrum(type_occurrences, by = tissue, CT=TRUE) # by levels
138 p3 <- plot_spectrum(type_occurrences, CT=TRUE, legend=TRUE) # total 142 p3 <- plot_spectrum(type_occurrences, CT=TRUE, legend=TRUE) # total
285 facet_grid(~level, scales = "free_x", space="free") 289 facet_grid(~level, scales = "free_x", space="free")
286 } 290 }
287 # Combine the two plots: 291 # Combine the two plots:
288 grid.arrange(pc3, pc4, top = textGrob("Absolute and Relative Contributions of Cosmic signatures to mutational patterns",gp=gpar(fontsize=12,font=3))) 292 grid.arrange(pc3, pc4, top = textGrob("Absolute and Relative Contributions of Cosmic signatures to mutational patterns",gp=gpar(fontsize=12,font=3)))
289 293
290 # Select signatures with some contribution (above a threshold) 294 #### pie charts of comic signatures contributions in samples ###
291 threshold <- tail(sort(unlist(rowSums(fit_res$contribution), use.names = FALSE)), opt$signum)[1]
292 select <- which(rowSums(fit_res$contribution) >= threshold) # ensure opt$signum best signatures in samples are retained, the others discarded
293
294 ## pie charts of comic signatures contributions in samples
295 sig_data_pie <- as.data.frame(t(head(fit_res$contribution[select,])))
296 colnames(sig_data_pie) <- gsub("nature", "", colnames(sig_data_pie))
297 sig_data_pie_percents <- sig_data_pie / (apply(sig_data_pie,1,sum)) * 100
298 sig_data_pie_percents$sample <- rownames(sig_data_pie_percents)
299 library(reshape2) 295 library(reshape2)
300 melted_sig_data_pie_percents <-melt(data=sig_data_pie_percents) 296 library(dplyr)
301 melted_sig_data_pie_percents$label <- sub("Sig.", "", melted_sig_data_pie_percents$variable) 297 if (length(levels(factor(levels_table$level))) < 2) {
302 melted_sig_data_pie_percents$pos <- cumsum(melted_sig_data_pie_percents$value) - melted_sig_data_pie_percents$value/2 298 fit_res_contrib <- as.data.frame(fit_res$contribution)
303 p7 <- ggplot(melted_sig_data_pie_percents, aes(x="", y=value, group=variable, fill=variable)) + 299 worklist <- cbind(signature=rownames(fit_res$contribution),
304 geom_bar(width = 1, stat = "identity") + 300 level=rep("nolabels", length(fit_res_contrib[,1])),
305 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color="black", size=3) + 301 fit_res_contrib,
306 coord_polar("y", start=0) + facet_wrap(~ sample) + 302 sum=rowSums(fit_res_contrib))
307 labs(x="", y="Samples", fill = cosmic_tag) + 303 worklist <- worklist[order(worklist[ ,"sum"], decreasing = T), ]
308 scale_fill_manual(name = paste0(length(select), " most contributing\nSignatures"), values = cosmic_colors[select]) 304 worklist <- worklist[1:opt$signum,]
309 theme(axis.text = element_blank(), 305 worklist <- worklist[ , -length(worklist[1,])]
310 axis.ticks = element_blank(), 306 worklist <- melt(worklist)
311 panel.grid = element_blank()) 307 worklist <- worklist[,c(1,3,4,2)]
308 } else {
309 worklist <- list()
310 for (i in levels(factor(levels_table$level))) {
311 fit_res$contribution[,levels_table$element_identifier[levels_table$level == i]] -> worklist[[i]]
312 sum <- rowSums(as.data.frame(worklist[[i]]))
313 worklist[[i]] <- cbind(worklist[[i]], sum)
314 worklist[[i]] <- worklist[[i]][order(worklist[[i]][ ,"sum"], decreasing = T), ]
315 worklist[[i]] <- worklist[[i]][1:opt$signum,]
316 worklist[[i]] <- worklist[[i]][ , -length(as.data.frame(worklist[[i]]))]
317 }
318 worklist <- as.data.frame(melt(worklist))
319 }
320
321 colnames(worklist) <- c("signature", "sample", "value", "level")
322 print(worklist)
323 worklist <- as.data.frame(worklist %>% group_by(sample) %>% mutate(value=value/sum(value)*100))
324 worklist$pos <- cumsum(worklist$value) - worklist$value/2
325 worklist$label <- factor(worklist$signature)
326 worklist$signature <- factor(worklist$signature)
327 p7 <- ggplot(worklist, aes(x="", y=value, group=signature, fill=signature)) +
328 geom_bar(width = 1, stat = "identity") +
329 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color="black", size=3) +
330 coord_polar("y", start=0) + facet_wrap(~ sample) +
331 labs(x="", y="Samples", fill = cosmic_tag) +
332 scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"),
333 values = cosmic_colors[as.numeric(levels(factor(worklist$label)))]) +
334 theme(axis.text = element_blank(),
335 axis.ticks = element_blank(),
336 panel.grid = element_blank())
312 grid.arrange(p7) 337 grid.arrange(p7)
313 338
314 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering 339 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering
315 if (length(vcf_paths) > 1) { 340 if (length(vcf_paths) > 1) {
316 p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") 341 p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete")