diff mutational_patterns.R @ 24:ca6c19ee7da0 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit bad92e3210a78b5ebf47d6950f4dba10c1cbf07d
author artbio
date Tue, 05 Jul 2022 21:41:43 +0000
parents 83f8c93c34b4
children b00fef2b1c2c
line wrap: on
line diff
--- a/mutational_patterns.R	Wed Oct 27 00:46:47 2021 +0000
+++ b/mutational_patterns.R	Tue Jul 05 21:41:43 2022 +0000
@@ -1,7 +1,8 @@
 # load packages that are provided in the conda env
-options(show.error.messages = F,
+options(show.error.messages = FALSE,
        error = function() {
-           cat(geterrmessage(), file = stderr()); q("no", 1, F)
+           cat(geterrmessage(), file = stderr())
+           q("no", 1, FALSE)
            }
         )
 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
@@ -202,16 +203,19 @@
     # (For larger datasets it is wise to perform more iterations by changing the nrun parameter
     # to achieve stability and avoid local minima)
     nmf_res <- extract_signatures(pseudo_mut_mat, rank = opt$newsignum, nrun = opt$nrun)
-    # Assign signature names
-    colnames(nmf_res$signatures) <- paste0("SBS", 1:opt$newsignum)
-    rownames(nmf_res$contribution) <- paste0("SBS", 1:opt$newsignum)
+    # Assign signature COSMICv3.2 names
+    cosmic_signatures <- get_known_signatures()
+    nmf_res <- rename_nmf_signatures(nmf_res, cosmic_signatures, cutoff = 0.85)
+    sim_matrix <- cos_sim_matrix(cosmic_signatures, nmf_res$signatures)
+    plot_cosine_sim <- plot_cosine_heatmap(sim_matrix)
+    grid.arrange(plot_cosine_sim)
     # Plot the 96-profile of the signatures:
     p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE)
     new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq")
     new_sig_matrix <- format(new_sig_matrix, scientific = TRUE)
-    newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = T),
+    newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = TRUE),
                      "[", new_sig_matrix$substitution, "]",
-                     gsub("^.\\.", "", new_sig_matrix$context, perl = T))
+                     gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE))
     new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]])
     write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t")
     grid.arrange(p5)
@@ -329,8 +333,8 @@
     fit_res <- fit_to_signatures(pseudo_mut_mat, sbs_signatures)
 
     # Plot contribution barplots
-    pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = T, mode = "absolute")
-    pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = T, mode = "relative")
+    pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "absolute")
+    pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "relative")
     if (is.na(opt$levels)[1]) {  # if there are NO levels to display in graphs
         pc3_data <- pc3$data
         pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
@@ -397,7 +401,7 @@
                           level = rep("nolabels", length(fit_res_contrib[, 1])),
                           fit_res_contrib,
                           sum = rowSums(fit_res_contrib))
-        worklist <- worklist[order(worklist[, "sum"], decreasing = T), ]
+        worklist <- worklist[order(worklist[, "sum"], decreasing = TRUE), ]
         worklist <- worklist[1:opt$signum, ]
         worklist <- worklist[, -length(worklist[1, ])]
         worklist <- melt(worklist)
@@ -405,10 +409,10 @@
     } else {
         worklist <- list()
         for (i in levels(factor(levels_table$level))) {
-             fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]] -> worklist[[i]]
+             worklist[[i]] <- fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]]
              sum <- rowSums(as.data.frame(worklist[[i]]))
              worklist[[i]] <- cbind(worklist[[i]], sum)
-             worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = T), ]
+             worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = TRUE), ]
              worklist[[i]] <- worklist[[i]][1:opt$signum, ]
              worklist[[i]] <- worklist[[i]][, -length(as.data.frame(worklist[[i]]))]
         }
@@ -424,7 +428,7 @@
     p7 <-  ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) +
               geom_bar(width = 1, stat = "identity") +
               geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "white", size = 3) +
-              coord_polar("y", start = 0) + facet_wrap(.~sample) +
+              coord_polar("y", start = 0) + facet_wrap(. ~ sample) +
               labs(x = "", y = "Samples", fill = tag) +
               scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"),
                                 values = signature_colors[levels(worklist$signature)],
@@ -452,7 +456,7 @@
             output_table <- data.frame(sample = rownames(output_table), output_table)
             colnames(output_table) <- gsub("X", "SBS", colnames(output_table))
         }
-        write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = F, row.names = F)
+        write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = FALSE, row.names = FALSE)
     }
 
     # calculate all pairwise cosine similarities