comparison mutational_patterns.R @ 25:b00fef2b1c2c draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit eeb46055822c6805c209af0c450ae941100960bd
author artbio
date Wed, 06 Jul 2022 11:43:09 +0000
parents ca6c19ee7da0
children af5c65ad5317
comparison
equal deleted inserted replaced
24:ca6c19ee7da0 25:b00fef2b1c2c
67 make_option( 67 make_option(
68 "--newsignum", 68 "--newsignum",
69 default = 2, 69 default = 2,
70 type = "integer", 70 type = "integer",
71 help = "Number of new signatures to be captured" 71 help = "Number of new signatures to be captured"
72 ),
73 make_option(
74 "--cosmic_id_threshold",
75 default = 0.85,
76 type = "double",
77 help = "minimu cosine similarity to rename a new signature according to cosmic v3.2"
72 ), 78 ),
73 make_option( 79 make_option(
74 "--output_spectrum", 80 "--output_spectrum",
75 default = NA, 81 default = NA,
76 type = "character", 82 type = "character",
203 # (For larger datasets it is wise to perform more iterations by changing the nrun parameter 209 # (For larger datasets it is wise to perform more iterations by changing the nrun parameter
204 # to achieve stability and avoid local minima) 210 # to achieve stability and avoid local minima)
205 nmf_res <- extract_signatures(pseudo_mut_mat, rank = opt$newsignum, nrun = opt$nrun) 211 nmf_res <- extract_signatures(pseudo_mut_mat, rank = opt$newsignum, nrun = opt$nrun)
206 # Assign signature COSMICv3.2 names 212 # Assign signature COSMICv3.2 names
207 cosmic_signatures <- get_known_signatures() 213 cosmic_signatures <- get_known_signatures()
208 nmf_res <- rename_nmf_signatures(nmf_res, cosmic_signatures, cutoff = 0.85) 214 nmf_res <- rename_nmf_signatures(nmf_res, cosmic_signatures, cutoff = opt$cosmic_id_threshold)
209 sim_matrix <- cos_sim_matrix(cosmic_signatures, nmf_res$signatures) 215 sim_matrix <- cos_sim_matrix(cosmic_signatures, nmf_res$signatures)
210 plot_cosine_sim <- plot_cosine_heatmap(sim_matrix) 216 plot_cosine_sim <- plot_cosine_heatmap(sim_matrix)
211 grid.arrange(plot_cosine_sim) 217 grid.arrange(plot_cosine_sim)
212 # Plot the 96-profile of the signatures: 218 # Plot the 96-profile of the signatures:
213 p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE) 219 p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE)
220 grid.arrange(p5)
221 # write matrix of deno signatures for user
214 new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq") 222 new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq")
215 new_sig_matrix <- format(new_sig_matrix, scientific = TRUE) 223 new_sig_matrix <- format(new_sig_matrix, scientific = TRUE)
216 newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = TRUE), 224 newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = TRUE),
217 "[", new_sig_matrix$substitution, "]", 225 "[", new_sig_matrix$substitution, "]",
218 gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE)) 226 gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE))
219 new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]]) 227 new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]])
220 write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t") 228 write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t")
221 grid.arrange(p5)
222 # Visualize the contribution of the signatures in a barplot 229 # Visualize the contribution of the signatures in a barplot
223 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "relative", coord_flip = TRUE) 230 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "relative", coord_flip = TRUE)
224 # Visualize the contribution of the signatures in absolute number of mutations 231 # Visualize the contribution of the signatures in absolute number of mutations
225 pc2 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "absolute", coord_flip = TRUE) 232 pc2 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "absolute", coord_flip = TRUE)
226 # Combine the two plots: 233 # Combine the two plots:
229 # The relative contribution of each signature for each sample can also be plotted as a heatmap with 236 # The relative contribution of each signature for each sample can also be plotted as a heatmap with
230 # plot_contribution_heatmap, which might be easier to interpret and compare than stacked barplots. 237 # plot_contribution_heatmap, which might be easier to interpret and compare than stacked barplots.
231 # The samples can be hierarchically clustered based on their euclidean dis- tance. The signatures 238 # The samples can be hierarchically clustered based on their euclidean dis- tance. The signatures
232 # can be plotted in a user-specified order. 239 # can be plotted in a user-specified order.
233 # Plot signature contribution as a heatmap with sample clustering dendrogram and a specified signature order: 240 # Plot signature contribution as a heatmap with sample clustering dendrogram and a specified signature order:
234 pch1 <- plot_contribution_heatmap(nmf_res$contribution, 241 pch1 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = TRUE)
235 sig_order = paste0("NewSig_", 1:opt$newsignum))
236 # Plot signature contribution as a heatmap without sample clustering: 242 # Plot signature contribution as a heatmap without sample clustering:
237 pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = FALSE) 243 pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = FALSE)
238 #Combine the plots into one figure: 244 #Combine the plots into one figure:
239 grid.arrange(pch1, pch2, ncol = 2, widths = c(2, 1.6)) 245 grid.arrange(pch1, pch2, ncol = 2, widths = c(2, 1.6))
240 246
241 # Compare the reconstructed mutational profile with the original mutational profile: 247 # Compare the reconstructed mutational profile with the original mutational profile:
242 plot_compare_profiles(pseudo_mut_mat[, 1], 248 pch3 <- plot_original_vs_reconstructed(pseudo_mut_mat, nmf_res$reconstructed, y_intercept = 0.95)
243 nmf_res$reconstructed[, 1], 249 grid.arrange(pch3)
244 profile_names = c("Original", "Reconstructed"),
245 condensed = TRUE)
246 dev.off() 250 dev.off()
247 } 251 }
248 252
249 ##### Section 3: Find optimal contribution of known signatures: COSMIC or OWN mutational signatures #### 253 ##### Section 3: Find optimal contribution of known signatures: COSMIC or OWN mutational signatures ####
250 254