comparison mutational_patterns.R @ 3:e332cf9dfa06 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit 0f7593f703ba0dfd12aea1c0460e371f08b57d2f"
author artbio
date Thu, 24 Sep 2020 01:32:52 +0000
parents aea952be68cb
children 7ba08c826888
comparison
equal deleted inserted replaced
2:aea952be68cb 3:e332cf9dfa06
27 default = NA, 27 default = NA,
28 type = 'character', 28 type = 'character',
29 help = "path to the tab separated file describing the levels in function of datasets" 29 help = "path to the tab separated file describing the levels in function of datasets"
30 ), 30 ),
31 make_option( 31 make_option(
32 "--cosmic_version",
33 default = "v2",
34 type = 'character',
35 help = "Version of the Cosmic Signature set to be used to express mutational patterns"
36 ),
37 make_option(
32 "--signum", 38 "--signum",
33 default = 2, 39 default = 2,
34 type = 'integer', 40 type = 'integer',
35 help = "selects the N most significant signatures in samples to express mutational patterns" 41 help = "selects the N most significant signatures in samples to express mutational patterns"
36 ), 42 ),
50 "--newsignum", 56 "--newsignum",
51 default = 2, 57 default = 2,
52 type = 'integer', 58 type = 'integer',
53 help = "Number of new signatures to be captured" 59 help = "Number of new signatures to be captured"
54 ), 60 ),
55
56 make_option( 61 make_option(
57 "--output_spectrum", 62 "--output_spectrum",
58 default = NA, 63 default = NA,
59 type = 'character', 64 type = 'character',
60 help = "path to output dataset" 65 help = "path to output dataset"
64 default = NA, 69 default = NA,
65 type = 'character', 70 type = 'character',
66 help = "path to output dataset" 71 help = "path to output dataset"
67 ), 72 ),
68 make_option( 73 make_option(
74 "--sigmatrix",
75 default = NA,
76 type = 'character',
77 help = "path to signature matrix"
78 ),
79 make_option(
69 "--output_cosmic", 80 "--output_cosmic",
70 default = NA, 81 default = NA,
71 type = 'character', 82 type = 'character',
72 help = "path to output dataset" 83 help = "path to output dataset"
73 ) 84 ),
85 make_option(
86 c("-r", "--rdata"),
87 type="character",
88 default=NULL,
89 help="Path to RData output file")
74 ) 90 )
75 91
76 opt = parse_args(OptionParser(option_list = option_list), 92 opt = parse_args(OptionParser(option_list = option_list),
77 args = commandArgs(trailingOnly = TRUE)) 93 args = commandArgs(trailingOnly = TRUE))
78 94
79 ################ Manage input data #################### 95 ################ Manage input data ####################
80 json_dict <- opt$inputs 96 json_dict <- opt$inputs
81 parser <- newJSONParser() 97 parser <- newJSONParser()
82 parser$addData(json_dict) 98 parser$addData(json_dict)
83 fileslist <- parser$getObject() 99 fileslist <- parser$getObject()
84 vcf_files <- attr(fileslist, "names") 100 vcf_paths <- attr(fileslist, "names")
85 sample_names <- unname(unlist(fileslist)) 101 element_identifiers <- unname(unlist(fileslist))
86 ref_genome <- opt$genome 102 ref_genome <- opt$genome
87 vcf_table <- data.frame(sample_name=sample_names, path=vcf_files) 103 vcf_table <- data.frame(element_identifier=element_identifiers, path=vcf_paths)
88 104
89 library(MutationalPatterns) 105 library(MutationalPatterns)
90 library(ref_genome, character.only = TRUE) 106 library(ref_genome, character.only = TRUE)
107 library(ggplot2)
91 108
92 # Load the VCF files into a GRangesList: 109 # Load the VCF files into a GRangesList:
93 vcfs <- read_vcfs_as_granges(vcf_files, sample_names, ref_genome) 110 vcfs <- read_vcfs_as_granges(vcf_paths, element_identifiers, ref_genome)
94 if (!is.na(opt$levels)[1]) { # manage levels if there are 111 if (!is.na(opt$levels)[1]) { # manage levels if there are
95 levels_table <- read.delim(opt$levels, header=FALSE, col.names=c("sample_name","level")) 112 levels_table <- read.delim(opt$levels, header=FALSE, col.names=c("element_identifier","level"))
96 library(dplyr) 113 library(plyr)
97 metadata_table <- left_join(vcf_table, levels_table, by = "sample_name") 114 metadata_table <- join(vcf_table, levels_table, by = "element_identifier")
98 # detach("package:dplyr", unload=TRUE) 115 tissue <- as.vector(metadata_table$level)
99 tissue <- metadata_table$level
100 } 116 }
101 117
102 ##### This is done for any section ###### 118 ##### This is done for any section ######
103 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome) 119 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome)
104
105 120
106 ###### Section 1 Mutation characteristics and spectrums ############# 121 ###### Section 1 Mutation characteristics and spectrums #############
107 if (!is.na(opt$output_spectrum)[1]) { 122 if (!is.na(opt$output_spectrum)[1]) {
108 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69) 123 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69)
109 type_occurrences <- mut_type_occurrences(vcfs, ref_genome) 124 type_occurrences <- mut_type_occurrences(vcfs, ref_genome)
112 127
113 if (is.na(opt$levels)[1]) { 128 if (is.na(opt$levels)[1]) {
114 p1 <- plot_spectrum(type_occurrences, CT = TRUE, legend=TRUE) 129 p1 <- plot_spectrum(type_occurrences, CT = TRUE, legend=TRUE)
115 plot(p1) 130 plot(p1)
116 } else { 131 } else {
117 p2 <- plot_spectrum(type_occurrences, by = tissue, CT=TRUE, legend=TRUE) # by sample 132 p2 <- plot_spectrum(type_occurrences, by = tissue, CT=TRUE) # by levels
118 p3 <- plot_spectrum(type_occurrences, CT=TRUE, legend=TRUE) # total 133 p3 <- plot_spectrum(type_occurrences, CT=TRUE, legend=TRUE) # total
119 grid.arrange(p2, p3, ncol=2, widths=c(4,2.3), heights=c(4,1)) 134 grid.arrange(p2, p3, ncol=2, widths=c(4,2.3), heights=c(4,1))
120 } 135 }
121 plot_96_profile(mut_mat, condensed = TRUE) 136 plot_96_profile(mut_mat, condensed = TRUE)
122 dev.off() 137 dev.off()
123 } 138 }
124 139
125 ###### Section 2: De novo mutational signature extraction using NMF ####### 140 ###### Section 2: De novo mutational signature extraction using NMF #######
126 if (!is.na(opt$output_denovo)[1]) { 141 if (!is.na(opt$output_denovo)[1]) {
127 mut_mat <- mut_mat + 0.0001 # First add a small psuedocount to the mutation count matrix 142 # opt$rank cannot be higher than the number of samples
143 if (opt$rank > length(element_identifiers)) {opt$rank <-length(element_identifiers)}
144 # likewise, opt$signum cannot be higher thant the number of samples
145 if (opt$signum > length(element_identifiers)) {opt$signum <-length(element_identifiers)}
146 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix
128 # Use the NMF package to generate an estimate rank plot 147 # Use the NMF package to generate an estimate rank plot
129 library("NMF") 148 library("NMF")
130 estimate <- nmf(mut_mat, rank=1:opt$rank+1, method="brunet", nrun=opt$nrun, seed=123456) 149 estimate <- nmf(pseudo_mut_mat, rank=1:opt$rank, method="brunet", nrun=opt$nrun, seed=123456)
131 # And plot it 150 # And plot it
132 pdf(opt$output_denovo, paper = "special", width = 11.69, height = 11.69) 151 pdf(opt$output_denovo, paper = "special", width = 11.69, height = 11.69)
133 p.trans <- plot(estimate) 152 p4 <- plot(estimate)
134 grid.arrange(p.trans) 153 grid.arrange(p4)
135 # Extract 4 (PARAMETIZE) mutational signatures from the mutation count matrix with extract_signatures 154 # Extract 4 (PARAMETIZE) mutational signatures from the mutation count matrix with extract_signatures
136 # (For larger datasets it is wise to perform more iterations by changing the nrun parameter 155 # (For larger datasets it is wise to perform more iterations by changing the nrun parameter
137 # to achieve stability and avoid local minima) 156 # to achieve stability and avoid local minima)
138 nmf_res <- extract_signatures(mut_mat, rank=opt$rank, nrun=opt$nrun) 157 nmf_res <- extract_signatures(pseudo_mut_mat, rank=opt$newsignum, nrun=opt$nrun)
139 # Assign signature names 158 # Assign signature names
140 colnames(nmf_res$signatures) <- paste0("NewSig_", 1:opt$rank) 159 colnames(nmf_res$signatures) <- paste0("NewSig_", 1:opt$newsignum)
141 rownames(nmf_res$contribution) <- paste0("NewSig_", 1:opt$rank) 160 rownames(nmf_res$contribution) <- paste0("NewSig_", 1:opt$newsignum)
142 # Plot the 96-profile of the signatures: 161 # Plot the 96-profile of the signatures:
143 p.trans <- plot_96_profile(nmf_res$signatures, condensed = TRUE) 162 p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE)
144 grid.arrange(p.trans) 163 new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ variable, value.var = "value")
164 new_sig_matrix = format(new_sig_matrix, scientific=TRUE)
165 write.table(new_sig_matrix, file=opt$sigmatrix, quote = FALSE, row.names = FALSE, sep="\t")
166 grid.arrange(p5)
145 # Visualize the contribution of the signatures in a barplot 167 # Visualize the contribution of the signatures in a barplot
146 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode="relative", coord_flip = TRUE) 168 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode="relative", coord_flip = TRUE)
147 # Visualize the contribution of the signatures in absolute number of mutations 169 # Visualize the contribution of the signatures in absolute number of mutations
148 pc2 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode="absolute", coord_flip = TRUE) 170 pc2 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode="absolute", coord_flip = TRUE)
149 # Combine the two plots: 171 # Combine the two plots:
153 # plot_contribution_heatmap, which might be easier to interpret and compare than stacked barplots. 175 # plot_contribution_heatmap, which might be easier to interpret and compare than stacked barplots.
154 # The samples can be hierarchically clustered based on their euclidean dis- tance. The signatures 176 # The samples can be hierarchically clustered based on their euclidean dis- tance. The signatures
155 # can be plotted in a user-specified order. 177 # can be plotted in a user-specified order.
156 # Plot signature contribution as a heatmap with sample clustering dendrogram and a specified signature order: 178 # Plot signature contribution as a heatmap with sample clustering dendrogram and a specified signature order:
157 pch1 <- plot_contribution_heatmap(nmf_res$contribution, 179 pch1 <- plot_contribution_heatmap(nmf_res$contribution,
158 sig_order = paste0("NewSig_", 1:opt$rank)) 180 sig_order = paste0("NewSig_", 1:opt$newsignum))
159 # Plot signature contribution as a heatmap without sample clustering: 181 # Plot signature contribution as a heatmap without sample clustering:
160 pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples=FALSE) 182 pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples=FALSE)
161 #Combine the plots into one figure: 183 #Combine the plots into one figure:
162 grid.arrange(pch1, pch2, ncol = 2, widths = c(2,1.6)) 184 grid.arrange(pch1, pch2, ncol = 2, widths = c(2,1.6))
163 185
164 # Compare the reconstructed mutational profile with the original mutational profile: 186 # Compare the reconstructed mutational profile with the original mutational profile:
165 plot_compare_profiles(mut_mat[,1], 187 plot_compare_profiles(pseudo_mut_mat[,1],
166 nmf_res$reconstructed[,1], 188 nmf_res$reconstructed[,1],
167 profile_names = c("Original", "Reconstructed"), 189 profile_names = c("Original", "Reconstructed"),
168 condensed = TRUE) 190 condensed = TRUE)
169 dev.off() 191 dev.off()
170 } 192 }
171 ##### Section 3: Find optimal contribution of known signatures: COSMIC mutational signatures #### 193 ##### Section 3: Find optimal contribution of known signatures: COSMIC mutational signatures ####
172 194
173 if (!is.na(opt$output_cosmic)[1]) { 195 if (!is.na(opt$output_cosmic)[1]) {
174 pdf(opt$output_cosmic, paper = "special", width = 11.69, height = 11.69) 196 pdf(opt$output_cosmic, paper = "special", width = 11.69, height = 11.69)
175 sp_url <- paste("https://cancer.sanger.ac.uk/cancergenome/assets/", "signatures_probabilities.txt", sep = "") 197 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small psuedocount to the mutation count matrix
176 cancer_signatures = read.table(sp_url, sep = "\t", header = TRUE) 198 if (opt$cosmic_version == "v2") {
177 mut_mat <- mut_mat + 0.0001 # First add a small psuedocount to the mutation count matrix 199 sp_url <- paste("https://cancer.sanger.ac.uk/cancergenome/assets/", "signatures_probabilities.txt", sep = "")
178 new_order = match(row.names(mut_mat), cancer_signatures$Somatic.Mutation.Type) 200 cancer_signatures = read.table(sp_url, sep = "\t", header = TRUE)
179 cancer_signatures = cancer_signatures[as.vector(new_order),] 201 new_order = match(row.names(pseudo_mut_mat), cancer_signatures$Somatic.Mutation.Type)
180 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type 202 cancer_signatures = cancer_signatures[as.vector(new_order),]
181 cancer_signatures = as.matrix(cancer_signatures[,4:33]) 203 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type
204 cancer_signatures = as.matrix(cancer_signatures[,4:33])
205 colnames(cancer_signatures) <- gsub("Signature.", "", colnames(cancer_signatures)) # shorten signature labels
206 cosmic_tag <- "Signatures (Cosmic v2, March 2015)"
207 } else {
208 sp_url <- "https://raw.githubusercontent.com/ARTbio/startbio/master/sigProfiler_SBS_signatures_2019_05_22.tsv"
209 cancer_signatures = read.table(sp_url, sep = "\t", header = TRUE)
210 new_order = match(row.names(pseudo_mut_mat), cancer_signatures$Somatic.Mutation.Type)
211 cancer_signatures = cancer_signatures[as.vector(new_order),]
212 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type
213 cancer_signatures = as.matrix(cancer_signatures[,4:70])
214 colnames(cancer_signatures) <- gsub("SBS", "", colnames(cancer_signatures)) # shorten signature labels
215 cosmic_tag <- "Signatures (Cosmic v3, May 2019)"
216 }
182 217
183 # Plot mutational profiles of the COSMIC signatures 218 # Plot mutational profiles of the COSMIC signatures
184 colnames(cancer_signatures) <- gsub("Signature.", "", colnames(cancer_signatures)) # shorten signature labels 219 if (opt$cosmic_version == "v2") {
185 p.trans <- plot_96_profile(cancer_signatures, condensed = TRUE, ymax = 0.3) 220 p6 <- plot_96_profile(cancer_signatures, condensed = TRUE, ymax = 0.3)
186 grid.arrange(p.trans, top = textGrob("COSMIC signature profiles",gp=gpar(fontsize=12,font=3))) 221 grid.arrange(p6, top = textGrob("COSMIC signature profiles",gp=gpar(fontsize=12,font=3)))
222 } else {
223 print(length(cancer_signatures))
224 p6 <- plot_96_profile(cancer_signatures[,1:33], condensed = TRUE, ymax = 0.3)
225 p6bis <- plot_96_profile(cancer_signatures[,34:67], condensed = TRUE, ymax = 0.3)
226 grid.arrange(p6, top = textGrob("COSMIC signature profiles (on two pages)",gp=gpar(fontsize=12,font=3)))
227 grid.arrange(p6bis, top = textGrob("COSMIC signature profiles (continued)",gp=gpar(fontsize=12,font=3)))
228 }
187 229
188 # Hierarchically cluster the COSMIC signatures based on their similarity with average linkage 230 # Hierarchically cluster the COSMIC signatures based on their similarity with average linkage
189 # hclust_cosmic = cluster_signatures(cancer_signatures, method = "average") 231 # hclust_cosmic = cluster_signatures(cancer_signatures, method = "average")
190 # store signatures in new order 232 # store signatures in new order
191 # cosmic_order = colnames(cancer_signatures)[hclust_cosmic$order] 233 # cosmic_order = colnames(cancer_signatures)[hclust_cosmic$order]
209 # Plot heatmap with specified signature order 251 # Plot heatmap with specified signature order
210 # p.trans <- plot_cosine_heatmap(cos_sim_samples_signatures, col_order = cosmic_order, cluster_rows = TRUE) 252 # p.trans <- plot_cosine_heatmap(cos_sim_samples_signatures, col_order = cosmic_order, cluster_rows = TRUE)
211 # grid.arrange(p.trans) 253 # grid.arrange(p.trans)
212 254
213 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles 255 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles
214 fit_res <- fit_to_signatures(mut_mat, cancer_signatures) 256 fit_res <- fit_to_signatures(pseudo_mut_mat, cancer_signatures)
215 257
216 # Select signatures with some contribution (above a threshold) 258 # Select signatures with some contribution (above a threshold)
217 threshold <- tail(sort(unlist(rowSums(fit_res$contribution), use.names = FALSE)), opt$signum)[1] 259 threshold <- tail(sort(unlist(rowSums(fit_res$contribution), use.names = FALSE)), opt$signum)[1]
218 select <- which(rowSums(fit_res$contribution) >= threshold) # ensure opt$signum best signatures in samples are retained, the others discarded 260 select <- which(rowSums(fit_res$contribution) >= threshold) # ensure opt$signum best signatures in samples are retained, the others discarded
219 # Plot contribution barplots 261 # Plot contribution barplots
220 pc1 <- plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "absolute") 262 pc3 <- plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "absolute")
221 pc2 <- plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "relative") 263 pc4 <- plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "relative")
264 #####
265 # ggplot2 alternative
266 if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs
267 pc3_data <- pc3$data
268 pc3_data <- merge (pc3_data, metadata_table[,c(1,3)], by.x="Sample", by.y="element_identifier")
269 pc3 <- ggplot(pc3_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) +
270 geom_bar(stat="identity", position='stack') +
271 scale_fill_discrete(name="Cosmic\nSignature") +
272 labs(x = "Samples", y = "Absolute contribution") + theme_bw() +
273 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank()) +
274 facet_grid(~level, scales = "free_x")
275 pc4_data <- pc4$data
276 pc4_data <- merge (pc4_data, metadata_table[,c(1,3)], by.x="Sample", by.y="element_identifier")
277 pc4 <- ggplot(pc4_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) +
278 geom_bar(stat="identity", position='fill') +
279 scale_fill_discrete(name="Cosmic\nSignature") +
280 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
281 labs(x = "Samples", y = "Relative contribution") + theme_bw() +
282 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank()) +
283 facet_grid(~level, scales = "free_x")
284 }
222 # Combine the two plots: 285 # Combine the two plots:
223 grid.arrange(pc1, pc2) 286 grid.arrange(pc3, pc4, top = textGrob("Absolute and Relative Contributions of Cosmic signatures to mutational patterns",gp=gpar(fontsize=12,font=3)))
224 ## pie charts of comic signatures contributions in samples 287 ## pie charts of comic signatures contributions in samples
225 288
226 sig_data_pie <- as.data.frame(t(head(fit_res$contribution[select,]))) 289 sig_data_pie <- as.data.frame(t(head(fit_res$contribution[select,])))
227 colnames(sig_data_pie) <- gsub("nature", "", colnames(sig_data_pie)) 290 colnames(sig_data_pie) <- gsub("nature", "", colnames(sig_data_pie))
228 sig_data_pie_percents <- sig_data_pie / (apply(sig_data_pie,1,sum)) * 100 291 sig_data_pie_percents <- sig_data_pie / (apply(sig_data_pie,1,sum)) * 100
229 sig_data_pie_percents$sample <- rownames(sig_data_pie_percents) 292 sig_data_pie_percents$sample <- rownames(sig_data_pie_percents)
230 library(reshape2) 293 library(reshape2)
231 melted_sig_data_pie_percents <-melt(data=sig_data_pie_percents) 294 melted_sig_data_pie_percents <-melt(data=sig_data_pie_percents)
232 melted_sig_data_pie_percents$label <- sub("Sig.", "", melted_sig_data_pie_percents$variable) 295 melted_sig_data_pie_percents$label <- sub("Sig.", "", melted_sig_data_pie_percents$variable)
233 melted_sig_data_pie_percents$pos <- cumsum(melted_sig_data_pie_percents$value) - melted_sig_data_pie_percents$value/2 296 melted_sig_data_pie_percents$pos <- cumsum(melted_sig_data_pie_percents$value) - melted_sig_data_pie_percents$value/2
234 library(ggplot2) 297 p7 <- ggplot(melted_sig_data_pie_percents, aes(x="", y=value, group=variable, fill=variable)) +
235 p.trans <- ggplot(melted_sig_data_pie_percents, aes(x="", y=value, group=variable, fill=variable)) +
236 geom_bar(width = 1, stat = "identity") + 298 geom_bar(width = 1, stat = "identity") +
237 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color="black", size=3) + 299 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color="black", size=3) +
238 coord_polar("y", start=0) + facet_wrap(~ sample) + 300 coord_polar("y", start=0) + facet_wrap(~ sample) +
239 labs(x="", y="Samples", fill = "Signatures (Cosmic_v2,March 2015)") + 301 labs(x="", y="Samples", fill = cosmic_tag) +
240 theme(axis.text = element_blank(), 302 theme(axis.text = element_blank(),
241 axis.ticks = element_blank(), 303 axis.ticks = element_blank(),
242 panel.grid = element_blank()) 304 panel.grid = element_blank())
243 grid.arrange(p.trans) 305 grid.arrange(p7)
244 306
245 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering 307 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering
246 p.trans <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") 308 p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete")
247 grid.arrange(p.trans) 309 grid.arrange(p8)
248 310
249 # Compare the reconstructed mutational profile of sample 1 with its original mutational profile 311 # Compare the reconstructed mutational profile of sample 1 with its original mutational profile
250 # plot_compare_profiles(mut_mat[,1], fit_res$reconstructed[,1], 312 # plot_compare_profiles(mut_mat[,1], fit_res$reconstructed[,1],
251 # profile_names = c("Original", "Reconstructed"), 313 # profile_names = c("Original", "Reconstructed"),
252 # condensed = TRUE) 314 # condensed = TRUE)
253 315
254 # Calculate the cosine similarity between all original and reconstructed mutational profiles with 316 # Calculate the cosine similarity between all original and reconstructed mutational profiles with
255 # `cos_sim_matrix` 317 # `cos_sim_matrix`
256 318
257 # calculate all pairwise cosine similarities 319 # calculate all pairwise cosine similarities
258 cos_sim_ori_rec <- cos_sim_matrix(mut_mat, fit_res$reconstructed) 320 cos_sim_ori_rec <- cos_sim_matrix(pseudo_mut_mat, fit_res$reconstructed)
259 # extract cosine similarities per sample between original and reconstructed 321 # extract cosine similarities per sample between original and reconstructed
260 cos_sim_ori_rec <- as.data.frame(diag(cos_sim_ori_rec)) 322 cos_sim_ori_rec <- as.data.frame(diag(cos_sim_ori_rec))
261 323
262 # We can use ggplot to make a barplot of the cosine similarities between the original and 324 # We can use ggplot to make a barplot of the cosine similarities between the original and
263 # reconstructed mutational profile of each sample. This clearly shows how well each mutational 325 # reconstructed mutational profile of each sample. This clearly shows how well each mutational
268 330
269 # Adjust data frame for plotting with gpplot 331 # Adjust data frame for plotting with gpplot
270 colnames(cos_sim_ori_rec) = "cos_sim" 332 colnames(cos_sim_ori_rec) = "cos_sim"
271 cos_sim_ori_rec$sample = row.names(cos_sim_ori_rec) 333 cos_sim_ori_rec$sample = row.names(cos_sim_ori_rec)
272 # Make barplot 334 # Make barplot
273 p.trans <- ggplot(cos_sim_ori_rec, aes(y=cos_sim, x=sample)) + 335 p9 <- ggplot(cos_sim_ori_rec, aes(y=cos_sim, x=sample)) +
274 geom_bar(stat="identity", fill = "skyblue4") + 336 geom_bar(stat="identity", fill = "skyblue4") +
275 coord_cartesian(ylim=c(0.8, 1)) + 337 coord_cartesian(ylim=c(0.8, 1)) +
276 # coord_flip(ylim=c(0.8,1)) + 338 # coord_flip(ylim=c(0.8,1)) +
277 ylab("Cosine similarity\n original VS reconstructed") + 339 ylab("Cosine similarity\n original VS reconstructed") +
278 xlab("") + 340 xlab("") +
281 theme_bw() + 343 theme_bw() +
282 theme(panel.grid.minor.y=element_blank(), 344 theme(panel.grid.minor.y=element_blank(),
283 panel.grid.major.y=element_blank()) + 345 panel.grid.major.y=element_blank()) +
284 # Add cut.off line 346 # Add cut.off line
285 geom_hline(aes(yintercept=.95)) 347 geom_hline(aes(yintercept=.95))
286 grid.arrange(p.trans, top = textGrob("Similarity between true and reconstructed profiles (with all Cosmic sig.)",gp=gpar(fontsize=12,font=3))) 348 grid.arrange(p9, top = textGrob("Similarity between true and reconstructed profiles (with all Cosmic sig.)",gp=gpar(fontsize=12,font=3)))
287
288 dev.off() 349 dev.off()
289 } 350 }
351
352
353 # Output RData file
354 if (!is.null(opt$rdata)) {
355 save.image(file=opt$rdata)
356 }