comparison mutational_patterns.R @ 28:9a33a5a90a2c draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/mutational_patterns commit c55e91bc6716a71ee3a3f30da3f4c915f465c1d4
author artbio
date Sun, 11 Feb 2024 01:12:41 +0000
parents af5c65ad5317
children
comparison
equal deleted inserted replaced
27:2c9759c7ba72 28:9a33a5a90a2c
1 # load packages that are provided in the conda env 1 # load packages that are provided in the conda env
2 options(show.error.messages = FALSE, 2 options(
3 error = function() { 3 show.error.messages = FALSE,
4 cat(geterrmessage(), file = stderr()) 4 error = function() {
5 q("no", 1, FALSE) 5 cat(geterrmessage(), file = stderr())
6 } 6 q("no", 1, FALSE)
7 ) 7 }
8 )
8 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 9 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
9 warnings() 10 warnings()
10 library(optparse) 11 library(optparse)
11 library(rjson) 12 library(rjson)
12 library(grid) 13 library(grid)
14 library(scales) 15 library(scales)
15 library(RColorBrewer) 16 library(RColorBrewer)
16 17
17 # Arguments 18 # Arguments
18 option_list <- list( 19 option_list <- list(
19 make_option( 20 make_option(
20 "--inputs", 21 "--inputs",
21 default = NA, 22 default = NA,
22 type = "character", 23 type = "character",
23 help = "json formatted dictionary of datasets and their paths" 24 help = "json formatted dictionary of datasets and their paths"
24 ), 25 ),
25 make_option( 26 make_option(
26 "--genome", 27 "--genome",
27 default = NA, 28 default = NA,
28 type = "character", 29 type = "character",
29 help = "genome name in the BSgenome bioconductor package" 30 help = "genome name in the BSgenome bioconductor package"
30 ), 31 ),
31 make_option( 32 make_option(
32 "--levels", 33 "--levels",
33 default = NA, 34 default = NA,
34 type = "character", 35 type = "character",
35 help = "path to the tab separated file describing the levels in function of datasets" 36 help = "path to the tab separated file describing the levels in function of datasets"
36 ), 37 ),
37 make_option( 38 make_option(
38 "--cosmic_version", 39 "--cosmic_version",
39 default = NA, 40 default = NA,
40 type = "character", 41 type = "character",
41 help = "Version of the Cosmic Signature set to be used to express mutational profiles" 42 help = "Version of the Cosmic Signature set to be used to express mutational profiles"
42 ), 43 ),
43 make_option( 44 make_option(
44 "--own_signatures", 45 "--own_signatures",
45 default = NA, 46 default = NA,
46 type = "character", 47 type = "character",
47 help = "Path to the user-defined signature matrix" 48 help = "Path to the user-defined signature matrix"
48 ), 49 ),
49 make_option( 50 make_option(
50 "--signum", 51 "--signum",
51 default = 2, 52 default = 2,
52 type = "integer", 53 type = "integer",
53 help = "selects the N most significant signatures in samples to express mutational profiles" 54 help = "selects the N most significant signatures in samples to express mutational profiles"
54 ), 55 ),
55 make_option( 56 make_option(
56 "--nrun", 57 "--nrun",
57 default = 2, 58 default = 2,
58 type = "integer", 59 type = "integer",
59 help = "Number of runs to fit signatures" 60 help = "Number of runs to fit signatures"
60 ), 61 ),
61 make_option( 62 make_option(
62 "--rank", 63 "--rank",
63 default = 2, 64 default = 2,
64 type = "integer", 65 type = "integer",
65 help = "number of ranks to display for parameter optimization" 66 help = "number of ranks to display for parameter optimization"
66 ), 67 ),
67 make_option( 68 make_option(
68 "--newsignum", 69 "--newsignum",
69 default = 2, 70 default = 2,
70 type = "integer", 71 type = "integer",
71 help = "Number of new signatures to be captured" 72 help = "Number of new signatures to be captured"
72 ), 73 ),
73 make_option( 74 make_option(
74 "--cosmic_id_threshold", 75 "--cosmic_id_threshold",
75 default = 0.85, 76 default = 0.85,
76 type = "double", 77 type = "double",
77 help = "minimu cosine similarity to rename a new signature according to cosmic v3.2" 78 help = "minimu cosine similarity to rename a new signature according to cosmic v3.2"
78 ), 79 ),
79 make_option( 80 make_option(
80 "--output_spectrum", 81 "--output_spectrum",
81 default = NA, 82 default = NA,
82 type = "character", 83 type = "character",
83 help = "path to output dataset" 84 help = "path to output dataset"
84 ), 85 ),
85 make_option( 86 make_option(
86 "--output_denovo", 87 "--output_denovo",
87 default = NA, 88 default = NA,
88 type = "character", 89 type = "character",
89 help = "path to output dataset" 90 help = "path to output dataset"
90 ), 91 ),
91 make_option( 92 make_option(
92 "--sigmatrix", 93 "--sigmatrix",
93 default = NA, 94 default = NA,
94 type = "character", 95 type = "character",
95 help = "path to signature matrix" 96 help = "path to signature matrix"
96 ), 97 ),
97 make_option( 98 make_option(
98 "--output_sigpattern", 99 "--output_sigpattern",
99 default = NA, 100 default = NA,
100 type = "character", 101 type = "character",
101 help = "path to output dataset" 102 help = "path to output dataset"
102 ), 103 ),
103 make_option( 104 make_option(
104 "--display_signatures", 105 "--display_signatures",
105 default = NA, 106 default = NA,
106 type = "character", 107 type = "character",
107 help = "display input signature profiles if set to yes" 108 help = "display input signature profiles if set to yes"
108 ), 109 ),
109 make_option( 110 make_option(
110 "--sig_contrib_matrix", 111 "--sig_contrib_matrix",
111 default = NA, 112 default = NA,
112 type = "character", 113 type = "character",
113 help = "path to signature contribution matrix" 114 help = "path to signature contribution matrix"
114 ), 115 ),
115 make_option( 116 make_option(
116 "--colors", 117 "--colors",
117 default = NA, 118 default = NA,
118 type = "character", 119 type = "character",
119 help = "color palette to display signatures" 120 help = "color palette to display signatures"
120 ), 121 ),
121 make_option( 122 make_option(
122 c("-r", "--rdata"), 123 c("-r", "--rdata"),
123 type = "character", 124 type = "character",
124 default = NULL, 125 default = NULL,
125 help = "Path to RData output file" 126 help = "Path to RData output file"
126 ), 127 ),
127 make_option( 128 make_option(
128 c("-t", "--tooldir"), 129 c("-t", "--tooldir"),
129 type = "character", 130 type = "character",
130 default = NULL, 131 default = NULL,
131 help = "Path to tool directory, where tool data are stored") 132 help = "Path to tool directory, where tool data are stored"
132 133 )
133 ) 134 )
134 135
135 opt <- parse_args(OptionParser(option_list = option_list), 136 opt <- parse_args(OptionParser(option_list = option_list),
136 args = commandArgs(trailingOnly = TRUE)) 137 args = commandArgs(trailingOnly = TRUE)
138 )
137 139
138 ################ Manage input data #################### 140 ################ Manage input data ####################
139 json_dict <- opt$inputs 141 json_dict <- opt$inputs
140 parser <- newJSONParser() 142 parser <- newJSONParser()
141 parser$addData(json_dict) 143 parser$addData(json_dict)
150 library(ggplot2) 152 library(ggplot2)
151 153
152 # Load the VCF files into a GRangesList: 154 # Load the VCF files into a GRangesList:
153 vcfs <- read_vcfs_as_granges(vcf_paths, element_identifiers, ref_genome) 155 vcfs <- read_vcfs_as_granges(vcf_paths, element_identifiers, ref_genome)
154 library(plyr) 156 library(plyr)
155 if (!is.na(opt$levels)[1]) { # manage levels if there are 157 if (!is.na(opt$levels)[1]) { # manage levels if there are
156 levels_table <- read.delim(opt$levels, header = FALSE, 158 levels_table <- read.delim(opt$levels,
157 col.names = c("element_identifier", "level")) 159 header = FALSE,
158 } else { 160 col.names = c("element_identifier", "level")
159 levels_table <- data.frame(element_identifier = vcf_table$element_identifier, 161 )
160 level = rep("nolabels", length(vcf_table$element_identifier))) 162 } else {
163 levels_table <- data.frame(
164 element_identifier = vcf_table$element_identifier,
165 level = rep("nolabels", length(vcf_table$element_identifier))
166 )
161 } 167 }
162 metadata_table <- join(vcf_table, levels_table, by = "element_identifier") 168 metadata_table <- join(vcf_table, levels_table, by = "element_identifier")
163 tissue <- as.vector(metadata_table$level) 169 tissue <- as.vector(metadata_table$level)
164 detach(package:plyr) 170 detach(package:plyr)
165 171
179 plot(p1) 185 plot(p1)
180 } else { 186 } else {
181 p2 <- plot_spectrum(type_occurrences, by = tissue, CT = TRUE) # by levels 187 p2 <- plot_spectrum(type_occurrences, by = tissue, CT = TRUE) # by levels
182 p3 <- plot_spectrum(type_occurrences, CT = TRUE, legend = TRUE) # total 188 p3 <- plot_spectrum(type_occurrences, CT = TRUE, legend = TRUE) # total
183 grid.arrange(p2, p3, ncol = 2, widths = c(4, 2.3), heights = c(4, 1)) 189 grid.arrange(p2, p3, ncol = 2, widths = c(4, 2.3), heights = c(4, 1))
184 } 190 }
185 plot_96_profile(mut_mat, condensed = TRUE) 191 plot_96_profile(mut_mat, condensed = TRUE)
186 dev.off() 192 dev.off()
187 } 193 }
188 194
189 ###### Section 2: De novo mutational signature extraction using NMF ####### 195 ###### Section 2: De novo mutational signature extraction using NMF #######
190 # opt$rank cannot be higher than the number of samples and 196 # opt$rank cannot be higher than the number of samples and
191 # likewise, opt$signum cannot be higher thant the number of samples 197 # likewise, opt$signum cannot be higher thant the number of samples
192 if (!is.na(opt$output_denovo)[1]) { 198 if (!is.na(opt$output_denovo)[1]) {
193
194 if (opt$rank > length(element_identifiers)) { 199 if (opt$rank > length(element_identifiers)) {
195 opt$rank <- length(element_identifiers) 200 opt$rank <- length(element_identifiers)
196 } 201 }
197 if (opt$signum > length(element_identifiers)) { 202 if (opt$signum > length(element_identifiers)) {
198 opt$signum <- length(element_identifiers) 203 opt$signum <- length(element_identifiers)
199 } 204 }
200 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix 205 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix
201 # Use the NMF package to generate an estimate rank plot 206 # Use the NMF package to generate an estimate rank plot
202 library("NMF") 207 library("NMF")
203 estimate <- nmf(pseudo_mut_mat, rank = 1:opt$rank, method = "brunet", nrun = opt$nrun, seed = 123456) 208 estimate <- nmf(pseudo_mut_mat, rank = 1:opt$rank, method = "brunet", nrun = opt$nrun, seed = 123456)
204 # And plot it 209 # And plot it
219 p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE) 224 p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE)
220 grid.arrange(p5) 225 grid.arrange(p5)
221 # write matrix of deno signatures for user 226 # write matrix of deno signatures for user
222 new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq") 227 new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq")
223 new_sig_matrix <- format(new_sig_matrix, scientific = TRUE) 228 new_sig_matrix <- format(new_sig_matrix, scientific = TRUE)
224 newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = TRUE), 229 newcol <- paste0(
225 "[", new_sig_matrix$substitution, "]", 230 gsub("\\..", "", new_sig_matrix$context, perl = TRUE),
226 gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE)) 231 "[", new_sig_matrix$substitution, "]",
232 gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE)
233 )
227 new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]]) 234 new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]])
228 write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t") 235 write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t")
229 # Visualize the contribution of the signatures in a barplot 236 # Visualize the contribution of the signatures in a barplot
230 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "relative", coord_flip = TRUE) 237 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "relative", coord_flip = TRUE)
231 # Visualize the contribution of the signatures in absolute number of mutations 238 # Visualize the contribution of the signatures in absolute number of mutations
239 # can be plotted in a user-specified order. 246 # can be plotted in a user-specified order.
240 # Plot signature contribution as a heatmap with sample clustering dendrogram and a specified signature order: 247 # Plot signature contribution as a heatmap with sample clustering dendrogram and a specified signature order:
241 pch1 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = TRUE) 248 pch1 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = TRUE)
242 # Plot signature contribution as a heatmap without sample clustering: 249 # Plot signature contribution as a heatmap without sample clustering:
243 pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = FALSE) 250 pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = FALSE)
244 #Combine the plots into one figure: 251 # Combine the plots into one figure:
245 grid.arrange(pch1, pch2, ncol = 2, widths = c(2, 1.6)) 252 grid.arrange(pch1, pch2, ncol = 2, widths = c(2, 1.6))
246 253
247 # Compare the reconstructed mutational profile with the original mutational profile: 254 # Compare the reconstructed mutational profile with the original mutational profile:
248 pch3 <- plot_original_vs_reconstructed(pseudo_mut_mat, nmf_res$reconstructed, y_intercept = 0.95) 255 pch3 <- plot_original_vs_reconstructed(pseudo_mut_mat, nmf_res$reconstructed, y_intercept = 0.95)
249 grid.arrange(pch3) 256 grid.arrange(pch3)
250 dev.off() 257 dev.off()
251 } 258 }
252 259
253 ##### Section 3: Find optimal contribution of known signatures: COSMIC or OWN mutational signatures #### 260 ##### Section 3: Find optimal contribution of known signatures: COSMIC or OWN mutational signatures ####
254 261
255 if (!is.na(opt$output_sigpattern)[1]) { 262 if (!is.na(opt$output_sigpattern)[1]) {
256 # Prepare cosmic signatures 263 # Prepare cosmic signatures
257 if (!is.na(opt$cosmic_version)) { 264 if (!is.na(opt$cosmic_version)) {
258 cosmic_urls <- read.delim(paste0(opt$tooldir, "cosmic_urls.tsv"), sep = "\t", header = TRUE) 265 cosmic_urls <- read.delim(paste0(opt$tooldir, "cosmic_urls.tsv"), sep = "\t", header = TRUE)
259 cosmic_sbs_file <- cosmic_urls$url[cosmic_urls$genome == opt$genome & 266 cosmic_sbs_file <- cosmic_urls$url[cosmic_urls$genome == opt$genome &
260 cosmic_urls$cosmic_version == opt$cosmic_version] 267 cosmic_urls$cosmic_version == opt$cosmic_version]
261 sbs_signatures <- read.table(paste0(opt$tooldir, cosmic_sbs_file), 268 sbs_signatures <- read.table(paste0(opt$tooldir, cosmic_sbs_file),
262 sep = "\t", header = TRUE) 269 sep = "\t", header = TRUE
270 )
263 tag <- paste(gsub("BSgenome.Hsapiens.UCSC.", "", opt$genome), "COSMIC", opt$cosmic_version, sep = " ") 271 tag <- paste(gsub("BSgenome.Hsapiens.UCSC.", "", opt$genome), "COSMIC", opt$cosmic_version, sep = " ")
264 } 272 }
265 # Prepare user-defined signatures 273 # Prepare user-defined signatures
266 if (!is.na(opt$own_signatures)) { 274 if (!is.na(opt$own_signatures)) {
267 sbs_signatures <- read.table(opt$own_signatures, sep = "\t", header = TRUE) 275 sbs_signatures <- read.table(opt$own_signatures, sep = "\t", header = TRUE)
272 sbs_signatures <- subset(sbs_signatures, select = -c(Type)) 280 sbs_signatures <- subset(sbs_signatures, select = -c(Type))
273 # reorder substitutions of sbs_signatures to match mut_mat 281 # reorder substitutions of sbs_signatures to match mut_mat
274 sbs_signatures <- sbs_signatures[match(row.names(mut_mat), row.names(sbs_signatures)), ] 282 sbs_signatures <- sbs_signatures[match(row.names(mut_mat), row.names(sbs_signatures)), ]
275 # arrange signature colors 283 # arrange signature colors
276 if (opt$colors == "intense") { 284 if (opt$colors == "intense") {
277 signature_colors <- c("#3f4100", "#6f53ff", "#6dc400", "#9d1fd7", "#009c06", "#001fae", "#c4bedf", "#8adb4d", "#5a67ff", "#d8c938", "#024bc3", 285 signature_colors <- c(
278 "#d2ab00", "#e36eff", "#cad5b3", "#00ac44", "#d000b0", "#01b071", "#ff64e2", "#006b21", "#b70090", "#60dc9f", "#5f0083", 286 "#3f4100", "#6f53ff", "#6dc400", "#9d1fd7", "#009c06", "#001fae", "#c4bedf", "#8adb4d", "#5a67ff", "#d8c938", "#024bc3",
279 "#c0ce67", "#002981", "#e6b8b3", "#ffb53e", "#44005f", "#b59600", "#7d95ff", "#f47600", "#017bc4", "#ff2722", "#02cfec", 287 "#d2ab00", "#e36eff", "#cad5b3", "#00ac44", "#d000b0", "#01b071", "#ff64e2", "#006b21", "#b70090", "#60dc9f", "#5f0083",
280 "#ff233f", "#01b7b4", "#fd005c", "#019560", "#ff57a9", "#88d896", "#b80067", "#abd27f", "#dc8eff", "#667b00", "#fba3ff", 288 "#c0ce67", "#002981", "#e6b8b3", "#ffb53e", "#44005f", "#b59600", "#7d95ff", "#f47600", "#017bc4", "#ff2722", "#02cfec",
281 "#093f00", "#ff6494", "#009791", "#c93200", "#4ac8ff", "#a60005", "#8fd4b6", "#ce0036", "#00634d", "#ff6035", "#2d1956", 289 "#ff233f", "#01b7b4", "#fd005c", "#019560", "#ff57a9", "#88d896", "#b80067", "#abd27f", "#dc8eff", "#667b00", "#fba3ff",
282 "#f0be6d", "#6a0058", "#957a00", "#e4b4ff", "#4a5500", "#abc7fe", "#c95900", "#003d27", "#b10043", "#d5c68e", "#3e163e", 290 "#093f00", "#ff6494", "#009791", "#c93200", "#4ac8ff", "#a60005", "#8fd4b6", "#ce0036", "#00634d", "#ff6035", "#2d1956",
283 "#b36b00", "#debaeb", "#605400", "#7a0044", "#ffa06d", "#4c0d21", "#ff9cb5", "#3f1d02", "#ff958f", "#634a66", "#775500", 291 "#f0be6d", "#6a0058", "#957a00", "#e4b4ff", "#4a5500", "#abc7fe", "#c95900", "#003d27", "#b10043", "#d5c68e", "#3e163e",
284 "#6e0028", "#717653", "#6c1000", "#693600") 292 "#b36b00", "#debaeb", "#605400", "#7a0044", "#ffa06d", "#4c0d21", "#ff9cb5", "#3f1d02", "#ff958f", "#634a66", "#775500",
293 "#6e0028", "#717653", "#6c1000", "#693600"
294 )
285 } else { 295 } else {
286 signature_colors <- c("#7FC97F", "#BEAED4", "#FDC086", "#FFFF99", "#386CB0", "#F0027F", "#c4bedf", "#BF5B17", "#666666", "#1B9E77", "#D95F02", 296 signature_colors <- c(
287 "#7570B3", "#E7298A", "#cad5b3", "#66A61E", "#E6AB02", "#A6761D", "#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", 297 "#7FC97F", "#BEAED4", "#FDC086", "#FFFF99", "#386CB0", "#F0027F", "#c4bedf", "#BF5B17", "#666666", "#1B9E77", "#D95F02",
288 "#E31A1C", "#B3E2CD", "#e6b8b3", "#FF7F00", "#CAB2D6", "#6A3D9A", "#B15928", "#FBB4AE", "#B3CDE3", "#CCEBC5", "#DECBE4", 298 "#7570B3", "#E7298A", "#cad5b3", "#66A61E", "#E6AB02", "#A6761D", "#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99",
289 "#FED9A6", "#FFFFCC", "#E5D8BD", "#FDDAEC", "#F2F2F2", "#B3E2CD", "#FDCDAC", "#CBD5E8", "#F4CAE4", "#E6F5C9", "#FFF2AE", 299 "#E31A1C", "#B3E2CD", "#e6b8b3", "#FF7F00", "#CAB2D6", "#6A3D9A", "#B15928", "#FBB4AE", "#B3CDE3", "#CCEBC5", "#DECBE4",
290 "#F1E2CC", "#CCCCCC", "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FFFF33", "#A65628", "#F781BF", "#999999", "#66C2A5", 300 "#FED9A6", "#FFFFCC", "#E5D8BD", "#FDDAEC", "#F2F2F2", "#B3E2CD", "#FDCDAC", "#CBD5E8", "#F4CAE4", "#E6F5C9", "#FFF2AE",
291 "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", 301 "#F1E2CC", "#CCCCCC", "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FFFF33", "#A65628", "#F781BF", "#999999", "#66C2A5",
292 "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", "#D9D9D9", "#BC80BD", "#FFED6F", "#3f1d02", "#ff958f", "#634a66", "#775500", 302 "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072",
293 "#6e0028", "#717653", "#6c1000", "#693600") 303 "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", "#D9D9D9", "#BC80BD", "#FFED6F", "#3f1d02", "#ff958f", "#634a66", "#775500",
294 } 304 "#6e0028", "#717653", "#6c1000", "#693600"
295 names(signature_colors) <- c("SBS1", "SBS2", "SBS3", "SBS4", "SBS5", "SBS6", "SBS7", "SBS7a", "SBS7b", "SBS7c", "SBS7d", 305 )
296 "SBS8", "SBS9", "SBS10", "SBS10a", "SBS10b", "SBS10c", "SBS10d", "SBS11", "SBS12", "SBS13", "SBS14", 306 }
297 "SBS15", "SBS16", "SBS17", "SBS17a", "SBS17b", "SBS18", "SBS19", "SBS20", "SBS21", "SBS22", "SBS23", 307 names(signature_colors) <- c(
298 "SBS24", "SBS25", "SBS26", "SBS27", "SBS28", "SBS29", "SBS30", "SBS31", "SBS32", "SBS33", "SBS34", 308 "SBS1", "SBS2", "SBS3", "SBS4", "SBS5", "SBS6", "SBS7", "SBS7a", "SBS7b", "SBS7c", "SBS7d",
299 "SBS35", "SBS36", "SBS37", "SBS38", "SBS39", "SBS40", "SBS41", "SBS42", "SBS43", "SBS44", "SBS45", 309 "SBS8", "SBS9", "SBS10", "SBS10a", "SBS10b", "SBS10c", "SBS10d", "SBS11", "SBS12", "SBS13", "SBS14",
300 "SBS46", "SBS47", "SBS48", "SBS49", "SBS50", "SBS51", "SBS52", "SBS53", "SBS54", "SBS55", "SBS56", 310 "SBS15", "SBS16", "SBS17", "SBS17a", "SBS17b", "SBS18", "SBS19", "SBS20", "SBS21", "SBS22", "SBS23",
301 "SBS57", "SBS58", "SBS59", "SBS60", "SBS84", "SBS85", "SBS86", "SBS87", "SBS88", "SBS89", "SBS90", 311 "SBS24", "SBS25", "SBS26", "SBS27", "SBS28", "SBS29", "SBS30", "SBS31", "SBS32", "SBS33", "SBS34",
302 "SBS91", "SBS92", "SBS93", "SBS94") 312 "SBS35", "SBS36", "SBS37", "SBS38", "SBS39", "SBS40", "SBS41", "SBS42", "SBS43", "SBS44", "SBS45",
313 "SBS46", "SBS47", "SBS48", "SBS49", "SBS50", "SBS51", "SBS52", "SBS53", "SBS54", "SBS55", "SBS56",
314 "SBS57", "SBS58", "SBS59", "SBS60", "SBS84", "SBS85", "SBS86", "SBS87", "SBS88", "SBS89", "SBS90",
315 "SBS91", "SBS92", "SBS93", "SBS94"
316 )
303 317
304 # if signature names provided are not compliant with cosmic nomenclature, 318 # if signature names provided are not compliant with cosmic nomenclature,
305 # we attribute these names to the active signature_colors vector, adjusted to the length 319 # we attribute these names to the active signature_colors vector, adjusted to the length
306 # of the signature names. 320 # of the signature names.
307 321
308 if (! all(colnames(sbs_signatures) %in% names(signature_colors))) { # provided signature are not all included in cosmic names 322 if (!all(colnames(sbs_signatures) %in% names(signature_colors))) { # provided signature are not all included in cosmic names
309 signature_colors <- signature_colors[seq_along(sbs_signatures)] 323 signature_colors <- signature_colors[seq_along(sbs_signatures)]
310 names(signature_colors) <- colnames(sbs_signatures) 324 names(signature_colors) <- colnames(sbs_signatures)
311 } 325 }
312 326
313 # This is IMPORTANT since in Galaxy we do not use the embeded function get_known_signatures() 327 # This is IMPORTANT since in Galaxy we do not use the embeded function get_known_signatures()
318 332
319 pdf(opt$output_sigpattern, paper = "special", width = 11.69, height = 11.69) 333 pdf(opt$output_sigpattern, paper = "special", width = 11.69, height = 11.69)
320 if (opt$display_signatures == "yes") { 334 if (opt$display_signatures == "yes") {
321 for (i in head(seq(1, ncol(sbs_signatures), by = 20), -1)) { 335 for (i in head(seq(1, ncol(sbs_signatures), by = 20), -1)) {
322 p6 <- plot_96_profile(sbs_signatures[, i:(i + 19)], condensed = TRUE, ymax = 0.3) 336 p6 <- plot_96_profile(sbs_signatures[, i:(i + 19)], condensed = TRUE, ymax = 0.3)
323 grid.arrange(p6, top = textGrob(paste0(tag, " profiles (", trunc((i + 1) / 20) + 1, " of ", 337 grid.arrange(p6, top = textGrob(
324 trunc(ncol(sbs_signatures) / 20) + 1, " pages)"), 338 paste0(
325 gp = gpar(fontsize = 12, font = 3))) 339 tag, " profiles (", trunc((i + 1) / 20) + 1, " of ",
340 trunc(ncol(sbs_signatures) / 20) + 1, " pages)"
341 ),
342 gp = gpar(fontsize = 12, font = 3)
343 ))
326 } 344 }
327 p6 <- plot_96_profile(sbs_signatures[, (trunc(ncol(sbs_signatures) / 20) * 20):(ncol(sbs_signatures))], 345 p6 <- plot_96_profile(sbs_signatures[, (trunc(ncol(sbs_signatures) / 20) * 20):(ncol(sbs_signatures))],
328 condensed = TRUE, ymax = 0.3) 346 condensed = TRUE, ymax = 0.3
329 grid.arrange(p6, top = textGrob(paste0(tag, " profiles (", trunc(ncol(sbs_signatures) / 20) + 1, " of ", 347 )
330 trunc(ncol(sbs_signatures) / 20) + 1, " pages)"), 348 grid.arrange(p6, top = textGrob(
331 gp = gpar(fontsize = 12, font = 3))) 349 paste0(
350 tag, " profiles (", trunc(ncol(sbs_signatures) / 20) + 1, " of ",
351 trunc(ncol(sbs_signatures) / 20) + 1, " pages)"
352 ),
353 gp = gpar(fontsize = 12, font = 3)
354 ))
332 } 355 }
333 356
334 357
335 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles 358 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles
336 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix 359 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix
337 fit_res <- fit_to_signatures(pseudo_mut_mat, sbs_signatures) 360 fit_res <- fit_to_signatures(pseudo_mut_mat, sbs_signatures)
338 361
339 # Plot contribution barplots 362 # Plot contribution barplots
340 pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "absolute") 363 pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "absolute")
341 pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "relative") 364 pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "relative")
342 if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs 365 if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs
343 pc3_data <- pc3$data 366 pc3_data <- pc3$data
344 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + 367 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
345 geom_bar(stat = "identity", position = "stack") + 368 geom_bar(stat = "identity", position = "stack") +
346 coord_flip() + 369 coord_flip() +
347 scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + 370 scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) +
348 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + 371 labs(x = "Samples", y = "Absolute contribution") +
349 theme(panel.grid.minor.x = element_blank(), 372 theme_bw() +
350 panel.grid.major.x = element_blank(), 373 theme(
351 legend.position = "right", 374 panel.grid.minor.x = element_blank(),
352 text = element_text(size = 8), 375 panel.grid.major.x = element_blank(),
353 axis.text.x = element_text(angle = 90, hjust = 1)) 376 legend.position = "right",
377 text = element_text(size = 8),
378 axis.text.x = element_text(angle = 90, hjust = 1)
379 )
354 pc4_data <- pc4$data 380 pc4_data <- pc4$data
355 pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + 381 pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
356 geom_bar(stat = "identity", position = "fill") + 382 geom_bar(stat = "identity", position = "fill") +
357 coord_flip() + 383 coord_flip() +
358 scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + 384 scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) +
359 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + 385 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
360 labs(x = "Samples", y = "Relative contribution") + theme_bw() + 386 labs(x = "Samples", y = "Relative contribution") +
361 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position = "right", 387 theme_bw() +
362 text = element_text(size = 8), 388 theme(
363 axis.text.x = element_text(angle = 90, hjust = 1)) 389 panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position = "right",
390 text = element_text(size = 8),
391 axis.text.x = element_text(angle = 90, hjust = 1)
392 )
364 } 393 }
365 ##### 394 #####
366 # ggplot2 alternative 395 # ggplot2 alternative
367 if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs 396 if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs
368 pc3_data <- pc3$data 397 pc3_data <- pc3$data
369 pc3_data <- merge(pc3_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier") 398 pc3_data <- merge(pc3_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier")
370 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + 399 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
371 geom_bar(stat = "identity", position = "stack") + 400 geom_bar(stat = "identity", position = "stack") +
372 scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + 401 scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) +
373 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + 402 labs(x = "Samples", y = "Absolute contribution") +
374 theme(panel.grid.minor.x = element_blank(), 403 theme_bw() +
375 panel.grid.major.x = element_blank(), 404 theme(
376 legend.position = "right", 405 panel.grid.minor.x = element_blank(),
377 text = element_text(size = 8), 406 panel.grid.major.x = element_blank(),
378 axis.text.x = element_text(angle = 90, hjust = 1)) + 407 legend.position = "right",
379 facet_grid(~level, scales = "free_x", space = "free") 408 text = element_text(size = 8),
409 axis.text.x = element_text(angle = 90, hjust = 1)
410 ) +
411 facet_grid(~level, scales = "free_x", space = "free")
380 pc4_data <- pc4$data 412 pc4_data <- pc4$data
381 pc4_data <- merge(pc4_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier") 413 pc4_data <- merge(pc4_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier")
382 pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + 414 pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
383 geom_bar(stat = "identity", position = "fill") + 415 geom_bar(stat = "identity", position = "fill") +
384 scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + 416 scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) +
385 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + 417 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
386 labs(x = "Samples", y = "Relative contribution") + theme_bw() + 418 labs(x = "Samples", y = "Relative contribution") +
387 theme(panel.grid.minor.x = element_blank(), 419 theme_bw() +
388 panel.grid.major.x = element_blank(), 420 theme(
389 legend.position = "right", 421 panel.grid.minor.x = element_blank(),
390 text = element_text(size = 8), 422 panel.grid.major.x = element_blank(),
391 axis.text.x = element_text(angle = 90, hjust = 1)) + 423 legend.position = "right",
392 facet_grid(~level, scales = "free_x", space = "free") 424 text = element_text(size = 8),
425 axis.text.x = element_text(angle = 90, hjust = 1)
426 ) +
427 facet_grid(~level, scales = "free_x", space = "free")
393 } 428 }
394 # Combine the two plots: 429 # Combine the two plots:
395 grid.arrange(pc3, pc4, 430 grid.arrange(pc3, pc4,
396 top = textGrob("Absolute and Relative Contributions of elementary signatures to mutational profiles", 431 top = textGrob("Absolute and Relative Contributions of elementary signatures to mutational profiles",
397 gp = gpar(fontsize = 12, font = 3))) 432 gp = gpar(fontsize = 12, font = 3)
433 )
434 )
398 435
399 #### pie charts of comic signatures contributions in samples ### 436 #### pie charts of comic signatures contributions in samples ###
400 library(reshape2) 437 library(reshape2)
401 library(dplyr) 438 library(dplyr)
402 if (length(levels(factor(levels_table$level))) < 2) { 439 if (length(levels(factor(levels_table$level))) < 2) {
403 fit_res_contrib <- as.data.frame(fit_res$contribution) 440 fit_res_contrib <- as.data.frame(fit_res$contribution)
404 worklist <- cbind(signature = rownames(fit_res$contribution), 441 worklist <- cbind(
405 level = rep("nolabels", length(fit_res_contrib[, 1])), 442 signature = rownames(fit_res$contribution),
406 fit_res_contrib, 443 level = rep("nolabels", length(fit_res_contrib[, 1])),
407 sum = rowSums(fit_res_contrib)) 444 fit_res_contrib,
445 sum = rowSums(fit_res_contrib)
446 )
408 worklist <- worklist[order(worklist[, "sum"], decreasing = TRUE), ] 447 worklist <- worklist[order(worklist[, "sum"], decreasing = TRUE), ]
409 worklist <- worklist[1:opt$signum, ] 448 worklist <- worklist[1:opt$signum, ]
410 worklist <- worklist[, -length(worklist[1, ])] 449 worklist <- worklist[, -length(worklist[1, ])]
411 worklist <- melt(worklist) 450 worklist <- melt(worklist)
412 worklist <- worklist[, c(1, 3, 4, 2)] 451 worklist <- worklist[, c(1, 3, 4, 2)]
413 } else { 452 } else {
414 worklist <- list() 453 worklist <- list()
415 for (i in levels(factor(levels_table$level))) { 454 for (i in levels(factor(levels_table$level))) {
416 worklist[[i]] <- fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]] 455 worklist[[i]] <- fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]]
417 sum <- rowSums(as.data.frame(worklist[[i]])) 456 sum <- rowSums(as.data.frame(worklist[[i]]))
418 worklist[[i]] <- cbind(worklist[[i]], sum) 457 worklist[[i]] <- cbind(worklist[[i]], sum)
419 worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = TRUE), ] 458 worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = TRUE), ]
420 worklist[[i]] <- worklist[[i]][1:opt$signum, ] 459 worklist[[i]] <- worklist[[i]][1:opt$signum, ]
421 worklist[[i]] <- worklist[[i]][, -length(as.data.frame(worklist[[i]]))] 460 worklist[[i]] <- worklist[[i]][, -length(as.data.frame(worklist[[i]]))]
422 } 461 }
423 worklist <- as.data.frame(melt(worklist)) 462 worklist <- as.data.frame(melt(worklist))
424 worklist[, 2] <- paste0(worklist[, 4], " - ", worklist[, 2]) 463 worklist[, 2] <- paste0(worklist[, 4], " - ", worklist[, 2])
425 } 464 }
426 465
427 colnames(worklist) <- c("signature", "sample", "value", "level") 466 colnames(worklist) <- c("signature", "sample", "value", "level")
428 worklist <- as.data.frame(worklist %>% group_by(sample) %>% mutate(value = value / sum(value) * 100)) 467 worklist <- as.data.frame(worklist %>% group_by(sample) %>% mutate(value = value / sum(value) * 100))
429 worklist$pos <- cumsum(worklist$value) - worklist$value / 2 468 worklist$pos <- cumsum(worklist$value) - worklist$value / 2
430 worklist$label <- factor(gsub("SBS", "", worklist$signature)) 469 worklist$label <- factor(gsub("SBS", "", worklist$signature))
431 worklist$signature <- factor(worklist$signature) 470 worklist$signature <- factor(worklist$signature)
432 p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) + 471 p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) +
433 geom_bar(width = 1, stat = "identity") + 472 geom_bar(width = 1, stat = "identity") +
434 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "white", size = 3) + 473 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "white", size = 3) +
435 coord_polar("y", start = 0) + facet_wrap(. ~ sample) + 474 coord_polar("y", start = 0) +
436 labs(x = "", y = "Samples", fill = tag) + 475 facet_wrap(. ~ sample) +
437 scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"), 476 labs(x = "", y = "Samples", fill = tag) +
438 values = signature_colors[levels(worklist$signature)], 477 scale_fill_manual(
439 labels = names(signature_colors[levels(worklist$signature)])) + 478 name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"),
440 theme(axis.text = element_blank(), 479 values = signature_colors[levels(worklist$signature)],
441 axis.ticks = element_blank(), 480 labels = names(signature_colors[levels(worklist$signature)])
442 panel.grid = element_blank()) 481 ) +
482 theme(
483 axis.text = element_blank(),
484 axis.ticks = element_blank(),
485 panel.grid = element_blank()
486 )
443 grid.arrange(p7) 487 grid.arrange(p7)
444 488
445 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering 489 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering
446 if (length(vcf_paths) > 1) { 490 if (length(vcf_paths) > 1) {
447 p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") 491 p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete")
450 494
451 # export relative contribution matrix 495 # export relative contribution matrix
452 if (!is.na(opt$sig_contrib_matrix)) { 496 if (!is.na(opt$sig_contrib_matrix)) {
453 output_table <- t(fit_res$contribution) / rowSums(t(fit_res$contribution)) 497 output_table <- t(fit_res$contribution) / rowSums(t(fit_res$contribution))
454 if (length(levels(factor(levels_table$level))) > 1) { 498 if (length(levels(factor(levels_table$level))) > 1) {
455 output_table <- data.frame(sample = paste0(metadata_table[metadata_table$element_identifier == colnames(fit_res$contribution), 499 output_table <- data.frame(
456 3], "-", colnames(fit_res$contribution)), 500 sample = paste0(metadata_table[
457 output_table) 501 metadata_table$element_identifier == colnames(fit_res$contribution),
502 3
503 ], "-", colnames(fit_res$contribution)),
504 output_table
505 )
458 colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) 506 colnames(output_table) <- gsub("X", "SBS", colnames(output_table))
459 } else { 507 } else {
460 output_table <- data.frame(sample = rownames(output_table), output_table) 508 output_table <- data.frame(sample = rownames(output_table), output_table)
461 colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) 509 colnames(output_table) <- gsub("X", "SBS", colnames(output_table))
462 } 510 }
463 write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = FALSE, row.names = FALSE) 511 write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = FALSE, row.names = FALSE)
464 } 512 }
478 # Adjust data frame for plotting with gpplot 526 # Adjust data frame for plotting with gpplot
479 colnames(cos_sim_ori_rec) <- "cos_sim" 527 colnames(cos_sim_ori_rec) <- "cos_sim"
480 cos_sim_ori_rec$sample <- row.names(cos_sim_ori_rec) 528 cos_sim_ori_rec$sample <- row.names(cos_sim_ori_rec)
481 # Make barplot 529 # Make barplot
482 p9 <- ggplot(cos_sim_ori_rec, aes(y = cos_sim, x = sample)) + 530 p9 <- ggplot(cos_sim_ori_rec, aes(y = cos_sim, x = sample)) +
483 geom_bar(stat = "identity", fill = "skyblue4") + 531 geom_bar(stat = "identity", fill = "skyblue4") +
484 coord_cartesian(ylim = c(0.8, 1)) + 532 coord_cartesian(ylim = c(0.8, 1)) +
485 # coord_flip(ylim=c(0.8,1)) + 533 # coord_flip(ylim=c(0.8,1)) +
486 ylab("Cosine similarity\n original VS reconstructed") + 534 ylab("Cosine similarity\n original VS reconstructed") +
487 xlab("") + 535 xlab("") +
488 # Reverse order of the samples such that first is up 536 # Reverse order of the samples such that first is up
489 # xlim(rev(levels(factor(cos_sim_ori_rec$sample)))) + 537 # xlim(rev(levels(factor(cos_sim_ori_rec$sample)))) +
490 theme_bw() + 538 theme_bw() +
491 theme(panel.grid.minor.y = element_blank(), 539 theme(
492 panel.grid.major.y = element_blank(), 540 panel.grid.minor.y = element_blank(),
493 axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1) 541 panel.grid.major.y = element_blank(),
494 ) + 542 axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)
495 # Add cut.off line 543 ) +
496 geom_hline(aes(yintercept = .95)) 544 # Add cut.off line
545 geom_hline(aes(yintercept = .95))
497 grid.arrange(p9, top = textGrob("Similarity between true profiles and profiles reconstructed with elementary signatures", gp = gpar(fontsize = 12, font = 3))) 546 grid.arrange(p9, top = textGrob("Similarity between true profiles and profiles reconstructed with elementary signatures", gp = gpar(fontsize = 12, font = 3)))
498 dev.off() 547 dev.off()
499 } 548 }
500 549
501 550
502 # Output RData file 551 # Output RData file
503 if (!is.null(opt$rdata)) { 552 if (!is.null(opt$rdata)) {
504 save.image(file = opt$rdata) 553 save.image(file = opt$rdata)
505 } 554 }