changeset 28:9a33a5a90a2c draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/mutational_patterns commit c55e91bc6716a71ee3a3f30da3f4c915f465c1d4
author artbio
date Sun, 11 Feb 2024 01:12:41 +0000
parents 2c9759c7ba72
children
files mutational_patterns.R mutational_patterns.xml
diffstat 2 files changed, 298 insertions(+), 249 deletions(-) [+]
line wrap: on
line diff
--- a/mutational_patterns.R	Sat Feb 10 17:45:11 2024 +0000
+++ b/mutational_patterns.R	Sun Feb 11 01:12:41 2024 +0000
@@ -1,10 +1,11 @@
 # load packages that are provided in the conda env
-options(show.error.messages = FALSE,
-       error = function() {
-           cat(geterrmessage(), file = stderr())
-           q("no", 1, FALSE)
-           }
-        )
+options(
+    show.error.messages = FALSE,
+    error = function() {
+        cat(geterrmessage(), file = stderr())
+        q("no", 1, FALSE)
+    }
+)
 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 warnings()
 library(optparse)
@@ -16,124 +17,125 @@
 
 # Arguments
 option_list <- list(
-  make_option(
-    "--inputs",
-    default = NA,
-    type = "character",
-    help = "json formatted dictionary of datasets and their paths"
-  ),
-  make_option(
-    "--genome",
-    default = NA,
-    type = "character",
-    help = "genome name in the BSgenome bioconductor package"
-  ),
-  make_option(
-    "--levels",
-    default = NA,
-    type = "character",
-    help = "path to the tab separated file describing the levels in function of datasets"
-  ),
-  make_option(
-    "--cosmic_version",
-    default = NA,
-    type = "character",
-    help = "Version of the Cosmic Signature set to be used to express mutational profiles"
-  ),
-  make_option(
-    "--own_signatures",
-    default = NA,
-    type = "character",
-    help = "Path to the user-defined signature matrix"
-  ),
-  make_option(
-    "--signum",
-    default = 2,
-    type = "integer",
-    help = "selects the N most significant signatures in samples to express mutational profiles"
-  ),
-  make_option(
-    "--nrun",
-    default = 2,
-    type = "integer",
-    help = "Number of runs to fit signatures"
-  ),
-  make_option(
-    "--rank",
-    default = 2,
-    type = "integer",
-    help = "number of ranks to display for parameter optimization"
-  ),
+    make_option(
+        "--inputs",
+        default = NA,
+        type = "character",
+        help = "json formatted dictionary of datasets and their paths"
+    ),
+    make_option(
+        "--genome",
+        default = NA,
+        type = "character",
+        help = "genome name in the BSgenome bioconductor package"
+    ),
+    make_option(
+        "--levels",
+        default = NA,
+        type = "character",
+        help = "path to the tab separated file describing the levels in function of datasets"
+    ),
+    make_option(
+        "--cosmic_version",
+        default = NA,
+        type = "character",
+        help = "Version of the Cosmic Signature set to be used to express mutational profiles"
+    ),
     make_option(
-    "--newsignum",
-    default = 2,
-    type = "integer",
-    help = "Number of new signatures to be captured"
-  ),
+        "--own_signatures",
+        default = NA,
+        type = "character",
+        help = "Path to the user-defined signature matrix"
+    ),
+    make_option(
+        "--signum",
+        default = 2,
+        type = "integer",
+        help = "selects the N most significant signatures in samples to express mutational profiles"
+    ),
+    make_option(
+        "--nrun",
+        default = 2,
+        type = "integer",
+        help = "Number of runs to fit signatures"
+    ),
+    make_option(
+        "--rank",
+        default = 2,
+        type = "integer",
+        help = "number of ranks to display for parameter optimization"
+    ),
+    make_option(
+        "--newsignum",
+        default = 2,
+        type = "integer",
+        help = "Number of new signatures to be captured"
+    ),
     make_option(
-    "--cosmic_id_threshold",
-    default = 0.85,
-    type = "double",
-    help = "minimu cosine similarity to rename a new signature according to cosmic v3.2"
-  ),
-  make_option(
-    "--output_spectrum",
-    default = NA,
-    type = "character",
-    help = "path to output dataset"
-  ),
-  make_option(
-    "--output_denovo",
-    default = NA,
-    type = "character",
-    help = "path to output dataset"
-  ),
-  make_option(
-    "--sigmatrix",
-    default = NA,
-    type = "character",
-    help = "path to signature matrix"
-  ),
-  make_option(
-    "--output_sigpattern",
-    default = NA,
-    type = "character",
-    help = "path to output dataset"
-  ),
-  make_option(
-    "--display_signatures",
-    default = NA,
-    type = "character",
-    help = "display input signature profiles if set to yes"
-  ),
-  make_option(
-    "--sig_contrib_matrix",
-    default = NA,
-    type = "character",
-    help = "path to signature contribution matrix"
-  ),
-  make_option(
-    "--colors",
-    default = NA,
-    type = "character",
-    help = "color palette to display signatures"
-  ),
-  make_option(
-    c("-r", "--rdata"),
-    type = "character",
-    default = NULL,
-    help = "Path to RData output file"
-  ),
-  make_option(
-    c("-t", "--tooldir"),
-    type = "character",
-    default = NULL,
-    help = "Path to tool directory, where tool data are stored")
-
+        "--cosmic_id_threshold",
+        default = 0.85,
+        type = "double",
+        help = "minimu cosine similarity to rename a new signature according to cosmic v3.2"
+    ),
+    make_option(
+        "--output_spectrum",
+        default = NA,
+        type = "character",
+        help = "path to output dataset"
+    ),
+    make_option(
+        "--output_denovo",
+        default = NA,
+        type = "character",
+        help = "path to output dataset"
+    ),
+    make_option(
+        "--sigmatrix",
+        default = NA,
+        type = "character",
+        help = "path to signature matrix"
+    ),
+    make_option(
+        "--output_sigpattern",
+        default = NA,
+        type = "character",
+        help = "path to output dataset"
+    ),
+    make_option(
+        "--display_signatures",
+        default = NA,
+        type = "character",
+        help = "display input signature profiles if set to yes"
+    ),
+    make_option(
+        "--sig_contrib_matrix",
+        default = NA,
+        type = "character",
+        help = "path to signature contribution matrix"
+    ),
+    make_option(
+        "--colors",
+        default = NA,
+        type = "character",
+        help = "color palette to display signatures"
+    ),
+    make_option(
+        c("-r", "--rdata"),
+        type = "character",
+        default = NULL,
+        help = "Path to RData output file"
+    ),
+    make_option(
+        c("-t", "--tooldir"),
+        type = "character",
+        default = NULL,
+        help = "Path to tool directory, where tool data are stored"
+    )
 )
 
 opt <- parse_args(OptionParser(option_list = option_list),
-                 args = commandArgs(trailingOnly = TRUE))
+    args = commandArgs(trailingOnly = TRUE)
+)
 
 ################ Manage input data ####################
 json_dict <- opt$inputs
@@ -152,12 +154,16 @@
 # Load the VCF files into a GRangesList:
 vcfs <- read_vcfs_as_granges(vcf_paths, element_identifiers, ref_genome)
 library(plyr)
-if (!is.na(opt$levels)[1]) {  # manage levels if there are
-    levels_table  <- read.delim(opt$levels, header = FALSE,
-                                col.names = c("element_identifier", "level"))
-    } else {
-    levels_table <- data.frame(element_identifier = vcf_table$element_identifier,
-                               level = rep("nolabels", length(vcf_table$element_identifier)))
+if (!is.na(opt$levels)[1]) { # manage levels if there are
+    levels_table <- read.delim(opt$levels,
+        header = FALSE,
+        col.names = c("element_identifier", "level")
+    )
+} else {
+    levels_table <- data.frame(
+        element_identifier = vcf_table$element_identifier,
+        level = rep("nolabels", length(vcf_table$element_identifier))
+    )
 }
 metadata_table <- join(vcf_table, levels_table, by = "element_identifier")
 tissue <- as.vector(metadata_table$level)
@@ -181,7 +187,7 @@
         p2 <- plot_spectrum(type_occurrences, by = tissue, CT = TRUE) # by levels
         p3 <- plot_spectrum(type_occurrences, CT = TRUE, legend = TRUE) # total
         grid.arrange(p2, p3, ncol = 2, widths = c(4, 2.3), heights = c(4, 1))
-   }
+    }
     plot_96_profile(mut_mat, condensed = TRUE)
     dev.off()
 }
@@ -190,13 +196,12 @@
 # opt$rank cannot be higher than the number of samples and
 # likewise, opt$signum cannot be higher thant the number of samples
 if (!is.na(opt$output_denovo)[1]) {
-
     if (opt$rank > length(element_identifiers)) {
         opt$rank <- length(element_identifiers)
-        }
+    }
     if (opt$signum > length(element_identifiers)) {
         opt$signum <- length(element_identifiers)
-        }
+    }
     pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix
     # Use the NMF package to generate an estimate rank plot
     library("NMF")
@@ -221,9 +226,11 @@
     # write matrix of deno signatures for user
     new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq")
     new_sig_matrix <- format(new_sig_matrix, scientific = TRUE)
-    newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = TRUE),
-                     "[", new_sig_matrix$substitution, "]",
-                     gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE))
+    newcol <- paste0(
+        gsub("\\..", "", new_sig_matrix$context, perl = TRUE),
+        "[", new_sig_matrix$substitution, "]",
+        gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE)
+    )
     new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]])
     write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t")
     # Visualize the contribution of the signatures in a barplot
@@ -241,14 +248,14 @@
     pch1 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = TRUE)
     # Plot signature contribution as a heatmap without sample clustering:
     pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = FALSE)
-    #Combine the plots into one figure:
+    # Combine the plots into one figure:
     grid.arrange(pch1, pch2, ncol = 2, widths = c(2, 1.6))
 
     # Compare the reconstructed mutational profile with the original mutational profile:
     pch3 <- plot_original_vs_reconstructed(pseudo_mut_mat, nmf_res$reconstructed, y_intercept = 0.95)
     grid.arrange(pch3)
     dev.off()
-    }
+}
 
 ##### Section 3: Find optimal contribution of known signatures: COSMIC or OWN mutational signatures ####
 
@@ -257,9 +264,10 @@
     if (!is.na(opt$cosmic_version)) {
         cosmic_urls <- read.delim(paste0(opt$tooldir, "cosmic_urls.tsv"), sep = "\t", header = TRUE)
         cosmic_sbs_file <- cosmic_urls$url[cosmic_urls$genome == opt$genome &
-                                           cosmic_urls$cosmic_version == opt$cosmic_version]
+                                               cosmic_urls$cosmic_version == opt$cosmic_version]
         sbs_signatures <- read.table(paste0(opt$tooldir, cosmic_sbs_file),
-                                            sep = "\t", header = TRUE)
+            sep = "\t", header = TRUE
+        )
         tag <- paste(gsub("BSgenome.Hsapiens.UCSC.", "", opt$genome), "COSMIC", opt$cosmic_version, sep = " ")
     }
     # Prepare user-defined signatures
@@ -274,38 +282,44 @@
     sbs_signatures <- sbs_signatures[match(row.names(mut_mat), row.names(sbs_signatures)), ]
     # arrange signature colors
     if (opt$colors == "intense") {
-        signature_colors <- c("#3f4100", "#6f53ff", "#6dc400", "#9d1fd7", "#009c06", "#001fae", "#c4bedf", "#8adb4d", "#5a67ff", "#d8c938", "#024bc3",
-                              "#d2ab00", "#e36eff", "#cad5b3", "#00ac44", "#d000b0", "#01b071", "#ff64e2", "#006b21", "#b70090", "#60dc9f", "#5f0083",
-                              "#c0ce67", "#002981", "#e6b8b3", "#ffb53e", "#44005f", "#b59600", "#7d95ff", "#f47600", "#017bc4", "#ff2722", "#02cfec",
-                              "#ff233f", "#01b7b4", "#fd005c", "#019560", "#ff57a9", "#88d896", "#b80067", "#abd27f", "#dc8eff", "#667b00", "#fba3ff",
-                              "#093f00", "#ff6494", "#009791", "#c93200", "#4ac8ff", "#a60005", "#8fd4b6", "#ce0036", "#00634d", "#ff6035", "#2d1956",
-                              "#f0be6d", "#6a0058", "#957a00", "#e4b4ff", "#4a5500", "#abc7fe", "#c95900", "#003d27", "#b10043", "#d5c68e", "#3e163e",
-                              "#b36b00", "#debaeb", "#605400", "#7a0044", "#ffa06d", "#4c0d21", "#ff9cb5", "#3f1d02", "#ff958f", "#634a66", "#775500",
-                              "#6e0028", "#717653", "#6c1000", "#693600")
+        signature_colors <- c(
+            "#3f4100", "#6f53ff", "#6dc400", "#9d1fd7", "#009c06", "#001fae", "#c4bedf", "#8adb4d", "#5a67ff", "#d8c938", "#024bc3",
+            "#d2ab00", "#e36eff", "#cad5b3", "#00ac44", "#d000b0", "#01b071", "#ff64e2", "#006b21", "#b70090", "#60dc9f", "#5f0083",
+            "#c0ce67", "#002981", "#e6b8b3", "#ffb53e", "#44005f", "#b59600", "#7d95ff", "#f47600", "#017bc4", "#ff2722", "#02cfec",
+            "#ff233f", "#01b7b4", "#fd005c", "#019560", "#ff57a9", "#88d896", "#b80067", "#abd27f", "#dc8eff", "#667b00", "#fba3ff",
+            "#093f00", "#ff6494", "#009791", "#c93200", "#4ac8ff", "#a60005", "#8fd4b6", "#ce0036", "#00634d", "#ff6035", "#2d1956",
+            "#f0be6d", "#6a0058", "#957a00", "#e4b4ff", "#4a5500", "#abc7fe", "#c95900", "#003d27", "#b10043", "#d5c68e", "#3e163e",
+            "#b36b00", "#debaeb", "#605400", "#7a0044", "#ffa06d", "#4c0d21", "#ff9cb5", "#3f1d02", "#ff958f", "#634a66", "#775500",
+            "#6e0028", "#717653", "#6c1000", "#693600"
+        )
     } else {
-        signature_colors <- c("#7FC97F", "#BEAED4", "#FDC086", "#FFFF99", "#386CB0", "#F0027F", "#c4bedf", "#BF5B17", "#666666", "#1B9E77", "#D95F02",
-                              "#7570B3", "#E7298A", "#cad5b3", "#66A61E", "#E6AB02", "#A6761D", "#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99",
-                              "#E31A1C", "#B3E2CD", "#e6b8b3", "#FF7F00", "#CAB2D6", "#6A3D9A", "#B15928", "#FBB4AE", "#B3CDE3", "#CCEBC5", "#DECBE4",
-                              "#FED9A6", "#FFFFCC", "#E5D8BD", "#FDDAEC", "#F2F2F2", "#B3E2CD", "#FDCDAC", "#CBD5E8", "#F4CAE4", "#E6F5C9", "#FFF2AE",
-                              "#F1E2CC", "#CCCCCC", "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FFFF33", "#A65628", "#F781BF", "#999999", "#66C2A5",
-                              "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072",
-                              "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", "#D9D9D9", "#BC80BD", "#FFED6F", "#3f1d02", "#ff958f", "#634a66", "#775500",
-                              "#6e0028", "#717653", "#6c1000", "#693600")
+        signature_colors <- c(
+            "#7FC97F", "#BEAED4", "#FDC086", "#FFFF99", "#386CB0", "#F0027F", "#c4bedf", "#BF5B17", "#666666", "#1B9E77", "#D95F02",
+            "#7570B3", "#E7298A", "#cad5b3", "#66A61E", "#E6AB02", "#A6761D", "#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99",
+            "#E31A1C", "#B3E2CD", "#e6b8b3", "#FF7F00", "#CAB2D6", "#6A3D9A", "#B15928", "#FBB4AE", "#B3CDE3", "#CCEBC5", "#DECBE4",
+            "#FED9A6", "#FFFFCC", "#E5D8BD", "#FDDAEC", "#F2F2F2", "#B3E2CD", "#FDCDAC", "#CBD5E8", "#F4CAE4", "#E6F5C9", "#FFF2AE",
+            "#F1E2CC", "#CCCCCC", "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FFFF33", "#A65628", "#F781BF", "#999999", "#66C2A5",
+            "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072",
+            "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", "#D9D9D9", "#BC80BD", "#FFED6F", "#3f1d02", "#ff958f", "#634a66", "#775500",
+            "#6e0028", "#717653", "#6c1000", "#693600"
+        )
     }
-    names(signature_colors) <- c("SBS1", "SBS2", "SBS3", "SBS4", "SBS5", "SBS6", "SBS7", "SBS7a", "SBS7b", "SBS7c", "SBS7d",
-                                 "SBS8", "SBS9", "SBS10", "SBS10a", "SBS10b", "SBS10c", "SBS10d", "SBS11", "SBS12", "SBS13", "SBS14",
-                                 "SBS15", "SBS16", "SBS17", "SBS17a", "SBS17b", "SBS18", "SBS19", "SBS20", "SBS21", "SBS22", "SBS23",
-                                 "SBS24", "SBS25", "SBS26", "SBS27", "SBS28", "SBS29", "SBS30", "SBS31", "SBS32", "SBS33", "SBS34",
-                                 "SBS35", "SBS36", "SBS37", "SBS38", "SBS39", "SBS40", "SBS41", "SBS42", "SBS43", "SBS44", "SBS45",
-                                 "SBS46", "SBS47", "SBS48", "SBS49", "SBS50", "SBS51", "SBS52", "SBS53", "SBS54", "SBS55", "SBS56",
-                                 "SBS57", "SBS58", "SBS59", "SBS60", "SBS84", "SBS85", "SBS86", "SBS87", "SBS88", "SBS89", "SBS90",
-                                 "SBS91", "SBS92", "SBS93", "SBS94")
+    names(signature_colors) <- c(
+        "SBS1", "SBS2", "SBS3", "SBS4", "SBS5", "SBS6", "SBS7", "SBS7a", "SBS7b", "SBS7c", "SBS7d",
+        "SBS8", "SBS9", "SBS10", "SBS10a", "SBS10b", "SBS10c", "SBS10d", "SBS11", "SBS12", "SBS13", "SBS14",
+        "SBS15", "SBS16", "SBS17", "SBS17a", "SBS17b", "SBS18", "SBS19", "SBS20", "SBS21", "SBS22", "SBS23",
+        "SBS24", "SBS25", "SBS26", "SBS27", "SBS28", "SBS29", "SBS30", "SBS31", "SBS32", "SBS33", "SBS34",
+        "SBS35", "SBS36", "SBS37", "SBS38", "SBS39", "SBS40", "SBS41", "SBS42", "SBS43", "SBS44", "SBS45",
+        "SBS46", "SBS47", "SBS48", "SBS49", "SBS50", "SBS51", "SBS52", "SBS53", "SBS54", "SBS55", "SBS56",
+        "SBS57", "SBS58", "SBS59", "SBS60", "SBS84", "SBS85", "SBS86", "SBS87", "SBS88", "SBS89", "SBS90",
+        "SBS91", "SBS92", "SBS93", "SBS94"
+    )
 
     # if signature names provided are not compliant with cosmic nomenclature,
     # we attribute these names to the active signature_colors vector, adjusted to the length
     # of the signature names.
 
-    if (! all(colnames(sbs_signatures) %in% names(signature_colors))) { # provided signature are not all included in cosmic names
+    if (!all(colnames(sbs_signatures) %in% names(signature_colors))) { # provided signature are not all included in cosmic names
         signature_colors <- signature_colors[seq_along(sbs_signatures)]
         names(signature_colors) <- colnames(sbs_signatures)
     }
@@ -320,15 +334,24 @@
     if (opt$display_signatures == "yes") {
         for (i in head(seq(1, ncol(sbs_signatures), by = 20), -1)) {
             p6 <- plot_96_profile(sbs_signatures[, i:(i + 19)], condensed = TRUE, ymax = 0.3)
-            grid.arrange(p6, top = textGrob(paste0(tag, " profiles (", trunc((i + 1) / 20) + 1, " of ",
-                                                   trunc(ncol(sbs_signatures) / 20) + 1, " pages)"),
-                                            gp = gpar(fontsize = 12, font = 3)))
+            grid.arrange(p6, top = textGrob(
+                paste0(
+                    tag, " profiles (", trunc((i + 1) / 20) + 1, " of ",
+                    trunc(ncol(sbs_signatures) / 20) + 1, " pages)"
+                ),
+                gp = gpar(fontsize = 12, font = 3)
+            ))
         }
         p6 <- plot_96_profile(sbs_signatures[, (trunc(ncol(sbs_signatures) / 20) * 20):(ncol(sbs_signatures))],
-                              condensed = TRUE, ymax = 0.3)
-        grid.arrange(p6, top = textGrob(paste0(tag, " profiles (", trunc(ncol(sbs_signatures) / 20) + 1, " of ",
-                                               trunc(ncol(sbs_signatures) / 20) + 1, " pages)"),
-                                        gp = gpar(fontsize = 12, font = 3)))
+            condensed = TRUE, ymax = 0.3
+        )
+        grid.arrange(p6, top = textGrob(
+            paste0(
+                tag, " profiles (", trunc(ncol(sbs_signatures) / 20) + 1, " of ",
+                trunc(ncol(sbs_signatures) / 20) + 1, " pages)"
+            ),
+            gp = gpar(fontsize = 12, font = 3)
+        ))
     }
 
 
@@ -339,72 +362,88 @@
     # Plot contribution barplots
     pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "absolute")
     pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "relative")
-    if (is.na(opt$levels)[1]) {  # if there are NO levels to display in graphs
+    if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs
         pc3_data <- pc3$data
         pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
-               geom_bar(stat = "identity", position = "stack") +
-               coord_flip() +
-               scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) +
-               labs(x = "Samples", y = "Absolute contribution") + theme_bw() +
-               theme(panel.grid.minor.x = element_blank(),
-                     panel.grid.major.x = element_blank(),
-                     legend.position = "right",
-                     text = element_text(size = 8),
-                     axis.text.x = element_text(angle = 90, hjust = 1))
+            geom_bar(stat = "identity", position = "stack") +
+            coord_flip() +
+            scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) +
+            labs(x = "Samples", y = "Absolute contribution") +
+            theme_bw() +
+            theme(
+                panel.grid.minor.x = element_blank(),
+                panel.grid.major.x = element_blank(),
+                legend.position = "right",
+                text = element_text(size = 8),
+                axis.text.x = element_text(angle = 90, hjust = 1)
+            )
         pc4_data <- pc4$data
         pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
-               geom_bar(stat = "identity", position = "fill") +
-               coord_flip() +
-               scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) +
-               scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
-               labs(x = "Samples", y = "Relative contribution") + theme_bw() +
-               theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position = "right",
-                     text = element_text(size = 8),
-                     axis.text.x = element_text(angle = 90, hjust = 1))
+            geom_bar(stat = "identity", position = "fill") +
+            coord_flip() +
+            scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) +
+            scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
+            labs(x = "Samples", y = "Relative contribution") +
+            theme_bw() +
+            theme(
+                panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position = "right",
+                text = element_text(size = 8),
+                axis.text.x = element_text(angle = 90, hjust = 1)
+            )
     }
     #####
     # ggplot2 alternative
-    if (!is.na(opt$levels)[1]) {  # if there are levels to display in graphs
+    if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs
         pc3_data <- pc3$data
         pc3_data <- merge(pc3_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier")
         pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
-               geom_bar(stat = "identity", position = "stack") +
-               scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) +
-               labs(x = "Samples", y = "Absolute contribution") + theme_bw() +
-               theme(panel.grid.minor.x = element_blank(),
-                     panel.grid.major.x = element_blank(),
-                     legend.position = "right",
-                     text = element_text(size = 8),
-                     axis.text.x = element_text(angle = 90, hjust = 1)) +
-               facet_grid(~level, scales = "free_x", space = "free")
+            geom_bar(stat = "identity", position = "stack") +
+            scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) +
+            labs(x = "Samples", y = "Absolute contribution") +
+            theme_bw() +
+            theme(
+                panel.grid.minor.x = element_blank(),
+                panel.grid.major.x = element_blank(),
+                legend.position = "right",
+                text = element_text(size = 8),
+                axis.text.x = element_text(angle = 90, hjust = 1)
+            ) +
+            facet_grid(~level, scales = "free_x", space = "free")
         pc4_data <- pc4$data
         pc4_data <- merge(pc4_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier")
         pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
-               geom_bar(stat = "identity", position = "fill") +
-               scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) +
-               scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
-               labs(x = "Samples", y = "Relative contribution") + theme_bw() +
-               theme(panel.grid.minor.x = element_blank(),
-                     panel.grid.major.x = element_blank(),
-                     legend.position = "right",
-                     text = element_text(size = 8),
-                     axis.text.x = element_text(angle = 90, hjust = 1)) +
-               facet_grid(~level, scales = "free_x", space = "free")
+            geom_bar(stat = "identity", position = "fill") +
+            scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) +
+            scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
+            labs(x = "Samples", y = "Relative contribution") +
+            theme_bw() +
+            theme(
+                panel.grid.minor.x = element_blank(),
+                panel.grid.major.x = element_blank(),
+                legend.position = "right",
+                text = element_text(size = 8),
+                axis.text.x = element_text(angle = 90, hjust = 1)
+            ) +
+            facet_grid(~level, scales = "free_x", space = "free")
     }
     # Combine the two plots:
     grid.arrange(pc3, pc4,
-                 top = textGrob("Absolute and Relative Contributions of elementary signatures to mutational profiles",
-                 gp = gpar(fontsize = 12, font = 3)))
+        top = textGrob("Absolute and Relative Contributions of elementary signatures to mutational profiles",
+            gp = gpar(fontsize = 12, font = 3)
+        )
+    )
 
     #### pie charts of comic signatures contributions in samples ###
     library(reshape2)
     library(dplyr)
     if (length(levels(factor(levels_table$level))) < 2) {
         fit_res_contrib <- as.data.frame(fit_res$contribution)
-        worklist <- cbind(signature = rownames(fit_res$contribution),
-                          level = rep("nolabels", length(fit_res_contrib[, 1])),
-                          fit_res_contrib,
-                          sum = rowSums(fit_res_contrib))
+        worklist <- cbind(
+            signature = rownames(fit_res$contribution),
+            level = rep("nolabels", length(fit_res_contrib[, 1])),
+            fit_res_contrib,
+            sum = rowSums(fit_res_contrib)
+        )
         worklist <- worklist[order(worklist[, "sum"], decreasing = TRUE), ]
         worklist <- worklist[1:opt$signum, ]
         worklist <- worklist[, -length(worklist[1, ])]
@@ -413,12 +452,12 @@
     } else {
         worklist <- list()
         for (i in levels(factor(levels_table$level))) {
-             worklist[[i]] <- fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]]
-             sum <- rowSums(as.data.frame(worklist[[i]]))
-             worklist[[i]] <- cbind(worklist[[i]], sum)
-             worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = TRUE), ]
-             worklist[[i]] <- worklist[[i]][1:opt$signum, ]
-             worklist[[i]] <- worklist[[i]][, -length(as.data.frame(worklist[[i]]))]
+            worklist[[i]] <- fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]]
+            sum <- rowSums(as.data.frame(worklist[[i]]))
+            worklist[[i]] <- cbind(worklist[[i]], sum)
+            worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = TRUE), ]
+            worklist[[i]] <- worklist[[i]][1:opt$signum, ]
+            worklist[[i]] <- worklist[[i]][, -length(as.data.frame(worklist[[i]]))]
         }
         worklist <- as.data.frame(melt(worklist))
         worklist[, 2] <- paste0(worklist[, 4], " - ", worklist[, 2])
@@ -429,17 +468,22 @@
     worklist$pos <- cumsum(worklist$value) - worklist$value / 2
     worklist$label <- factor(gsub("SBS", "", worklist$signature))
     worklist$signature <- factor(worklist$signature)
-    p7 <-  ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) +
-              geom_bar(width = 1, stat = "identity") +
-              geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "white", size = 3) +
-              coord_polar("y", start = 0) + facet_wrap(. ~ sample) +
-              labs(x = "", y = "Samples", fill = tag) +
-              scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"),
-                                values = signature_colors[levels(worklist$signature)],
-                                labels = names(signature_colors[levels(worklist$signature)])) +
-              theme(axis.text = element_blank(),
-                    axis.ticks = element_blank(),
-                    panel.grid  = element_blank())
+    p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) +
+        geom_bar(width = 1, stat = "identity") +
+        geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "white", size = 3) +
+        coord_polar("y", start = 0) +
+        facet_wrap(. ~ sample) +
+        labs(x = "", y = "Samples", fill = tag) +
+        scale_fill_manual(
+            name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"),
+            values = signature_colors[levels(worklist$signature)],
+            labels = names(signature_colors[levels(worklist$signature)])
+        ) +
+        theme(
+            axis.text = element_blank(),
+            axis.ticks = element_blank(),
+            panel.grid = element_blank()
+        )
     grid.arrange(p7)
 
     # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering
@@ -452,11 +496,15 @@
     if (!is.na(opt$sig_contrib_matrix)) {
         output_table <- t(fit_res$contribution) / rowSums(t(fit_res$contribution))
         if (length(levels(factor(levels_table$level))) > 1) {
-            output_table <- data.frame(sample = paste0(metadata_table[metadata_table$element_identifier == colnames(fit_res$contribution),
-                                                                    3], "-", colnames(fit_res$contribution)),
-                                       output_table)
+            output_table <- data.frame(
+                sample = paste0(metadata_table[
+                    metadata_table$element_identifier == colnames(fit_res$contribution),
+                    3
+                ], "-", colnames(fit_res$contribution)),
+                output_table
+            )
             colnames(output_table) <- gsub("X", "SBS", colnames(output_table))
-            } else {
+        } else {
             output_table <- data.frame(sample = rownames(output_table), output_table)
             colnames(output_table) <- gsub("X", "SBS", colnames(output_table))
         }
@@ -480,20 +528,21 @@
     cos_sim_ori_rec$sample <- row.names(cos_sim_ori_rec)
     # Make barplot
     p9 <- ggplot(cos_sim_ori_rec, aes(y = cos_sim, x = sample)) +
-                      geom_bar(stat = "identity", fill = "skyblue4") +
-                      coord_cartesian(ylim = c(0.8, 1)) +
-                      # coord_flip(ylim=c(0.8,1)) +
-                      ylab("Cosine similarity\n original VS reconstructed") +
-                      xlab("") +
-                      # Reverse order of the samples such that first is up
-                      # xlim(rev(levels(factor(cos_sim_ori_rec$sample)))) +
-                      theme_bw() +
-                      theme(panel.grid.minor.y = element_blank(),
-                            panel.grid.major.y = element_blank(),
-                            axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)
-                            ) +
-                      # Add cut.off line
-                      geom_hline(aes(yintercept = .95))
+        geom_bar(stat = "identity", fill = "skyblue4") +
+        coord_cartesian(ylim = c(0.8, 1)) +
+        # coord_flip(ylim=c(0.8,1)) +
+        ylab("Cosine similarity\n original VS reconstructed") +
+        xlab("") +
+        # Reverse order of the samples such that first is up
+        # xlim(rev(levels(factor(cos_sim_ori_rec$sample)))) +
+        theme_bw() +
+        theme(
+            panel.grid.minor.y = element_blank(),
+            panel.grid.major.y = element_blank(),
+            axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)
+        ) +
+        # Add cut.off line
+        geom_hline(aes(yintercept = .95))
     grid.arrange(p9, top = textGrob("Similarity between true profiles and profiles reconstructed with elementary signatures", gp = gpar(fontsize = 12, font = 3)))
     dev.off()
 }
@@ -501,5 +550,5 @@
 
 # Output RData file
 if (!is.null(opt$rdata)) {
-  save.image(file = opt$rdata)
+    save.image(file = opt$rdata)
 }
--- a/mutational_patterns.xml	Sat Feb 10 17:45:11 2024 +0000
+++ b/mutational_patterns.xml	Sun Feb 11 01:12:41 2024 +0000
@@ -2,7 +2,7 @@
     <description>from genomic variations in vcf files</description>
     <macros>
         <token name="@TOOL_VERSION@">3.12.0</token>
-        <token name="@VERSION_SUFFIX@">0</token>
+        <token name="@VERSION_SUFFIX@">1</token>
         <token name="@PROFILE@">23.0</token>
     </macros>
     <requirements>