7
|
1 #!/usr/bin/Rscript
|
|
2
|
|
3 #-----------------------------------#
|
|
4 # Author: Maude #
|
|
5 # Script: somaticSignature_Galaxy.r #
|
|
6 # Last update: 17/02/17 #
|
|
7 #-----------------------------------#
|
|
8
|
|
9
|
|
10 #########################################################################################################################################
|
|
11 # Run NMF algorithm and represent the composition of somatic signatures and the contribution in each samples #
|
|
12 #########################################################################################################################################
|
|
13
|
|
14 #-------------------------------------------------------------------------------
|
|
15 # Load library for recovering the arguments
|
|
16 #-------------------------------------------------------------------------------
|
|
17 suppressMessages(suppressWarnings(require("getopt")))
|
|
18
|
|
19
|
|
20 #-------------------------------------------------------------------------------
|
|
21 # Recover the arguments
|
|
22 #-------------------------------------------------------------------------------
|
|
23 spec = matrix(c(
|
|
24 "input" , "i", 1, "character",
|
|
25 "nbSignature", "nbSign", 1, "integer",
|
|
26 "cpu", "cpu", 1, "integer",
|
|
27 "output", "o", 1, "character",
|
|
28 "html", "html", 0, "character",
|
|
29 "help", "h", 0, "logical"
|
|
30 ),
|
|
31 byrow=TRUE, ncol=4
|
|
32 )
|
|
33
|
|
34 opt = getopt(spec);
|
|
35
|
|
36 # No argument is pass to the command line
|
|
37 if(length(opt) == 1)
|
|
38 {
|
|
39 cat(paste("Usage:\n somaticSignature_Galaxy.r --input <matrix> --nbSignature <nbSign> --cpu <cpu> --output <outputdir> --html <html_for_Galaxy>\n",sep=""))
|
|
40
|
|
41 cat(paste0("\n--input Input matrix created with the tool MutSpec-Stat\n--nbSignature Number of signatures to extract (min = 2)\n--cpu Number of CPUs\n--output Output directory\n--html Path to HTML page (ONLY FOR GALAXY WRAPPER)\n"))
|
|
42
|
|
43 q(status=1)
|
|
44 }
|
|
45
|
|
46 # Help was asked for.
|
|
47 if ( !is.null(opt$help) )
|
|
48 {
|
|
49 # print a friendly message and exit with a non-zero error code
|
|
50 cat(paste("Usage:\n somaticSignature_Galaxy.r --input <matrix> --nbSignature <nbSign> --cpu <cpu> --output <outputdir> --html <html_for_Galaxy>\n",sep=""))
|
|
51 q(status=1)
|
|
52 }
|
|
53
|
|
54
|
|
55
|
|
56 #-------------------------------------------------------------------------------
|
|
57 # Load library
|
|
58 #-------------------------------------------------------------------------------
|
|
59 suppressMessages(suppressWarnings(library(NMF)))
|
|
60 suppressMessages(suppressWarnings(library(ggplot2)))
|
|
61 suppressMessages(suppressWarnings(library(reshape)))
|
|
62 suppressMessages(suppressWarnings(library(grid)))
|
|
63 suppressMessages(suppressWarnings(library(scales))) # Set the maximum value to the y axis (graph composition somatic signature)
|
|
64 suppressMessages(suppressWarnings(library(gridExtra))) # function "unit"
|
|
65
|
|
66
|
|
67
|
|
68 ###############################################################################
|
|
69 # Load the functions #
|
|
70 ###############################################################################
|
|
71
|
|
72 #-------------------------------------------------------------------------------
|
|
73 # Set the font depending on X11 availability
|
|
74 #-------------------------------------------------------------------------------
|
|
75 font <- ""
|
|
76 # Check the device available
|
|
77 device <- capabilities()
|
|
78 # X11 is available
|
|
79 if(device[5]) { font <- "Helvetica" } else { font <- "Helvetica-Narrow" }
|
|
80
|
|
81 #-------------------------------------------------------------------------------
|
|
82 # My own theme
|
|
83 #-------------------------------------------------------------------------------
|
|
84 theme_custom <- function(base_size = 4, base_family = "")
|
|
85 {
|
|
86 # Starts with theme_grey and then modify some parts
|
|
87 theme_grey(base_size = base_size, base_family = base_family) %+replace%
|
|
88 theme(
|
|
89 axis.text = element_text(size = rel(0.8), family=font),
|
|
90 axis.ticks = element_line(colour = "black", size=.2),
|
|
91 axis.line = element_line(colour = "black", size = .2),
|
|
92 axis.ticks.length= unit(.05, "cm"),
|
|
93 axis.ticks.margin= unit(.05, "cm"), # space between tick mark and tick label (‘unit’)
|
|
94 legend.key.size = unit(.2, "cm"),
|
|
95 legend.margin = unit(-.5, "cm"),
|
|
96 panel.background = element_blank(),
|
|
97 panel.border = element_blank(),
|
|
98 panel.grid.major = element_blank(),
|
|
99 panel.grid.minor = element_blank(),
|
|
100 strip.text.y = element_text(size = 3)
|
|
101 )
|
|
102 }
|
|
103
|
|
104 #-------------------------------------------------------------------------------
|
|
105 # Customize the theme for adding a y axis
|
|
106 #-------------------------------------------------------------------------------
|
|
107 mytheme <- theme_custom()
|
|
108 mytheme$axis.line.x <- mytheme$axis.line.y <- mytheme$axis.line
|
|
109 mytheme$axis.line.x$colour <- 'white'
|
|
110
|
|
111 #-------------------------------------------------------------------------------
|
|
112 # Replace the signature number by alphabet letter
|
|
113 #-------------------------------------------------------------------------------
|
|
114 ConvertNb2Aphabet <- function(c)
|
|
115 {
|
|
116 if(c == "row1" || c == "col1") { c <- "A" } else
|
|
117 if(c == "row2" || c == "col2") { c <- "B"} else
|
|
118 if(c == "row3" || c == "col3") { c <- "C"} else
|
|
119 if(c == "row4" || c == "col4") { c <- "D"} else
|
|
120 if(c == "row5" || c == "col5") { c <- "E"} else
|
|
121 if(c == "row6" || c == "col6") { c <- "F"} else
|
|
122 if(c == "row7" || c == "col7") { c <- "G"} else
|
|
123 if(c == "row8" || c == "col8") { c <- "H"} else
|
|
124 if(c == "row9" || c == "col9") { c <- "I"} else
|
|
125 if(c == "row10" || c == "col10") { c <- "J"} else
|
|
126 if(c == "row11" || c == "col11") { c <- "K"} else
|
|
127 if(c == "row12" || c == "col12") { c <- "L"} else
|
|
128 if(c == "row13" || c == "col13") { c <- "M"} else
|
|
129 if(c == "row14" || c == "col14") { c <- "N"} else
|
|
130 if(c == "row15" || c == "col15") { c <- "O"} else
|
|
131 if(c == "row16" || c == "col16") { c <- "P"} else
|
|
132 if(c == "row17" || c == "col17") { c <- "Q"} else
|
|
133 if(c == "row18" || c == "col18") { c <- "R"} else
|
|
134 if(c == "row19" || c == "col19") { c <- "S"} else
|
|
135 if(c == "row20" || c == "col20") { c <- "T"} else
|
|
136 if(c == "row21" || c == "col21") { c <- "U"} else
|
|
137 if(c == "row22" || c == "col22") { c <- "V"} else
|
|
138 if(c == "row23" || c == "col23") { c <- "W"} else
|
|
139 if(c == "row24" || c == "col24") { c <- "X"} else
|
|
140 if(c == "row25" || c == "col25") { c <- "Y"} else
|
|
141 if(c == "row26" || c == "col26") { c <- "Z"} else { c <- c }
|
|
142 }
|
|
143
|
|
144 #-------------------------------------------------------------------------------
|
|
145 # Check the file doesn't have lines equal to zero
|
|
146 #-------------------------------------------------------------------------------
|
|
147 CheckFile <- function(rowsum, dataFrame, x)
|
|
148 {
|
|
149 if(rowsum == 0)
|
|
150 {
|
|
151 write("\n\nERROR: There is not enough mutations for running NMF!!!", stderr())
|
|
152 write(paste0("Input matrix contains at least one null row ", rownames(dataFrame)[x], "\n\n"), stderr())
|
|
153 stop()
|
|
154 }
|
|
155 }
|
|
156
|
|
157 #-------------------------------------------------------------------------------
|
|
158 # Contribution to Signature as the number of SBS per sample
|
|
159 #-------------------------------------------------------------------------------
|
|
160 Contri2SignSBS <- function(Total_SBS, Percent)
|
|
161 {
|
|
162 Total_SBS*Percent/100
|
|
163 }
|
|
164
|
|
165 #-------------------------------------------------------------------------------
|
|
166 # Combined two plots and share the legend
|
|
167 #-------------------------------------------------------------------------------
|
|
168 grid_arrange_shared_legend <- function(...)
|
|
169 {
|
|
170 plots <- list(...)
|
|
171 g <- ggplotGrob(plots[[1]] + theme(legend.position="bottom"))$grobs
|
|
172 legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
|
|
173 lheight <- sum(legend$height)
|
|
174 grid.arrange(
|
|
175 do.call(arrangeGrob, lapply(plots, function(x)
|
|
176 x + theme(legend.position="none"))),
|
|
177 legend,
|
|
178 ncol = 1,
|
|
179 heights = unit.c(unit(1, "npc") - lheight, lheight))
|
|
180 }
|
|
181
|
|
182 #-------------------------------------------------------------------------------
|
|
183 # Calculate the mean of each signatures in each cluster
|
|
184 #-------------------------------------------------------------------------------
|
|
185 meanCluster <- function(df)
|
|
186 {
|
|
187 max <- opt$nbSignature+1
|
|
188 sapply(2:max, function(x) { round(mean(as.numeric(as.matrix(df[,x]))), 2) } )
|
|
189 }
|
|
190
|
|
191
|
|
192
|
|
193
|
|
194 ###############################################################################
|
|
195 # Check file #
|
|
196 ###############################################################################
|
|
197
|
|
198 # The input musn't contains lines equal to zero !!!
|
|
199 matrixNMF <- read.table(opt$input, header=T)
|
|
200 # suppresses the return of sapply function
|
|
201 invisible( sapply(1:nrow(matrixNMF), function(x) { CheckFile(rowSums(matrixNMF)[x], matrixNMF, x) } ) )
|
|
202
|
|
203
|
|
204
|
|
205 ###############################################################################
|
|
206 # Run NMF #
|
|
207 ###############################################################################
|
|
208 # Create outdir
|
|
209 dir.create(opt$output)
|
|
210 # Create the output directories
|
|
211 output_NMF <- paste0(opt$output, "/NMF")
|
|
212 dir.create(output_NMF)
|
|
213 output_Figures <- paste0(output_NMF, "/", "Figures")
|
|
214 dir.create(output_Figures)
|
|
215 output_Files <- paste0(output_NMF, "/", "Files")
|
|
216 dir.create(output_Files)
|
|
217
|
|
218 # Define the output filenames
|
|
219 output_cluster <- paste0(output_Files, "/", "Cluster_MixtureCoeff.txt")
|
|
220 figure_cluster <- paste0(output_Figures, "/", "Heatmap_MixtureCoeff.png")
|
|
221 output_matrixW <- paste0(output_Files, "/", "MatrixW-Normto100.txt")
|
|
222 output_matrixW_ggplot2 <- paste0(output_Files, "/", "MatrixW-Inputggplot2.txt")
|
|
223 output_matrixH_ggplot2 <- paste0(output_Files, "/", "MatrixH-Inputggplot2.txt")
|
|
224 output_matrixH_cluster <- paste0(output_Files, "/", "Average_ContriByCluster.txt")
|
|
225 figure_matrixW_png <- paste0(output_Figures, "/", "CompositionSomaticMutation.png")
|
|
226 figure_matrixH_png <- paste0(output_Figures, "/", "ContributionMutationSignature.png")
|
|
227 figure_matrixH_cluster <- paste0(output_Figures, "/", "Average_ContriByCluster.png")
|
|
228
|
|
229
|
|
230 # Run NMF
|
|
231 # request a certain number of cores to use .opt="vP4"
|
|
232 nbCPU <- paste0("vP", opt$cpu)
|
|
233 res <- nmf(matrixNMF, opt$nbSignature, "brunet", nrun=200, .opt=nbCPU)
|
|
234
|
|
235 # If there is more than 300 samples the creation of the heatmap returns an error
|
|
236 if(ncol(matrixNMF) <= 300)
|
|
237 {
|
|
238 # Save the clustered heatmap generated by NMF
|
|
239 graphics.off() # close graphics windows
|
|
240 options(bitmapType='cairo')
|
|
241 png(figure_cluster)
|
|
242 coefmap(res, Colv="consensus")
|
|
243 dev.off()
|
|
244 }
|
|
245
|
|
246 # Recover the matrix W and H
|
|
247 matrixW <- basis(res)
|
|
248 matrixH <- coef(res)
|
|
249
|
|
250 # Recover the cluster of the samples
|
|
251 matrix_cluster <- cbind(as.numeric(predict(res, what="samples")), colnames(matrixNMF))
|
|
252 colnames(matrix_cluster) <- c("Cluster", "Samples")
|
|
253
|
|
254 ## Save the cluster matrix
|
|
255 write.table(matrix_cluster, file=output_cluster, quote=F, sep="\t", col.names=T, row.names=F)
|
|
256
|
|
257
|
|
258
|
|
259 ###############################################################################
|
|
260 # Composition of somatic signatures #
|
|
261 ###############################################################################
|
|
262
|
|
263 # Normalize to 100%
|
|
264 matrixW_norm <- t((t(matrixW)/colSums(matrixW))*100)
|
|
265 # Add a column name
|
|
266 colnames(matrixW_norm) <- colnames(matrixW_norm, do.NULL = FALSE, prefix = "col")
|
|
267 # Replace the name of the columns by the signature name
|
|
268 colnames(matrixW_norm) <- sapply(1:length(colnames(matrixW_norm)), function(x) { ConvertNb2Aphabet(colnames(matrixW_norm)[x]) } )
|
|
269
|
|
270 # Split the sequence context from the mutation type
|
|
271 context <- c() # Create an empty vector for the sequence context
|
|
272 alteration <- c() # Create an empty vector for the mutation type
|
|
273 for(i in 1:nrow(matrixW_norm))
|
|
274 {
|
|
275 temp <- strsplit((strsplit(rownames(matrixW_norm)[i], ""))[[1]], "")
|
|
276
|
|
277 context[i] <- paste0(temp[1], "_", temp[7])
|
|
278 alteration[i] <- paste0(temp[3], temp[4], temp[5])
|
|
279 }
|
|
280
|
|
281 # Melt the matrix using the signatures as variable
|
|
282 matrixW_melt <- melt(matrixW_norm)
|
|
283
|
|
284 # Add columns for the mutation type and the sequence context
|
|
285 matrixW_norm <- cbind(matrixW_norm, alteration, context)
|
|
286 # Reorder (alteration) for having the same order as in the matrice of published signatures
|
|
287 matrixW_norm <- matrixW_norm[order(matrixW_norm[,"alteration"], matrixW_norm[,"context"]), ]
|
|
288 # Reorder (columns) for having the same order as in the matrice of published signatures
|
|
289 matrixW_norm <- cbind(matrixW_norm[,c("alteration", "context")], matrixW_norm[,1:(ncol(matrixW_norm)-2)]) # Put the column alteration and context at the begining
|
|
290 # Save the matrix
|
|
291 write.table(matrixW_norm, file=output_matrixW, quote=F, sep="\t", col.names=T, row.names=F)
|
|
292
|
|
293 # Add columns for the mutation type and the sequence context
|
|
294 matrixW_melt <- cbind(matrixW_melt, alteration)
|
|
295 matrixW_melt <- cbind(matrixW_melt, context)
|
|
296 # Rename the columns
|
|
297 colnames(matrixW_melt) <- c("", "Signature", "value", "alteration", "context")
|
|
298
|
|
299 # Save the input for ggplot2
|
|
300 input_ggplot2 <- as.matrix(matrixW_melt)
|
|
301 input_ggplot2 <- input_ggplot2[,2:ncol(input_ggplot2)]
|
|
302 write.table(input_ggplot2, file=output_matrixW_ggplot2, quote=F, sep="\t", col.names=T, row.names=F)
|
|
303
|
|
304 # Maximum value of the y axis
|
|
305 max_matrixW <- as.numeric(max(matrixW_melt$value))
|
|
306
|
|
307
|
|
308 # Base plot
|
|
309 p <- ggplot(matrixW_melt, aes(x=context, y=value, fill=alteration)) + geom_bar(stat="identity", width=0.5) + facet_grid(Signature ~ alteration, scales="free_y")
|
|
310 # Color the mutation types
|
|
311 p <- p + scale_fill_manual(values=c("blue", "black", "red", "#828282", "#00CC33", "pink"))
|
|
312 # Remove the legend
|
|
313 p <- p + guides(fill=FALSE)
|
|
314 # Customized theme (no background, no facet grid and strip, y axis only)
|
|
315 p <- p + mytheme
|
|
316 # Remove the title of the x facet strip
|
|
317 p <- p + theme(strip.text.x=element_blank())
|
|
318 # Remove the x axis ticks and title
|
|
319 p <- p + theme(axis.title.x=element_blank(), axis.ticks.x = element_blank(), axis.title.y=element_text(size=5))
|
|
320 # Rename the y axis
|
|
321 p <- p + ylab("% contribution to signatures")
|
|
322 # Set the maximum value of the y axis to the maximum value of the matrix W
|
|
323 p <- p + scale_y_continuous(limits=c(0,max_matrixW), oob=squish, breaks=c(0,round(max_matrixW)))
|
|
324 # Save some space for adding the sequence context at the bottom
|
|
325 p <- p + theme(plot.margin=unit(c(.3, 0, 0, 0), "cm"))
|
|
326 p <- p + scale_x_discrete(breaks = c("A_A","A_C","A_G","A_T", "C_A","C_C","C_G","C_T", "G_A","G_C","G_G","G_T", "T_A","T_C","T_G","T_T"),
|
|
327 labels =c('A\nA',"\nC","\nG","\nT", 'C\nA',"\nC","\nG","\nT",
|
|
328 'G\nA',"\nC","\nG","\nT", 'T\nA',"\nC","\nG","\nT")
|
|
329 )
|
|
330
|
|
331
|
|
332 #------------------------------------------------------------------------------------------------------------------------------
|
|
333 # Change the color of the facets for the mutation type
|
|
334 #------------------------------------------------------------------------------------------------------------------------------
|
|
335 cols <- rep( c("blue", "black", "red", "#828282", "#00CC33", "pink")) # Facet strip colours
|
|
336
|
|
337 # Make a grob object
|
|
338 Pg <- ggplotGrob(p)
|
|
339 # To keep track of strip.background grobs
|
|
340 idx <- 0
|
|
341 # Find each strip.background and alter its backround colour
|
|
342 for( g in 1:length(Pg$grobs) )
|
|
343 {
|
|
344 if( grepl( "strip.absoluteGrob" , Pg$grobs[[g]]$name ) )
|
|
345 {
|
|
346 idx <- idx + 1
|
|
347 sb <- which( grepl( "strip\\.background" , names( Pg$grobs[[g]]$children ) ) )
|
|
348 Pg$grobs[[g]]$children[[sb]][]$gp$fill <- cols[idx]
|
|
349 }
|
|
350 }
|
|
351
|
|
352 # Reduce the size of the facet strip
|
|
353 Pg$heights[[3]] = unit(.05,"cm")
|
|
354
|
|
355
|
|
356 #------------------------------------------------------------------------------------------------------------------------------
|
|
357 # Save the graph in a png file
|
|
358 #------------------------------------------------------------------------------------------------------------------------------
|
|
359 options(bitmapType='cairo')
|
|
360 png(figure_matrixW_png, width=1300, heigh=500, res=300, pointsize = 4)
|
|
361 plot(Pg)
|
|
362 ## Add label for the mutation type above the strip facet
|
|
363 grid.text(0.12, unit(1,"npc") - unit(1.4,"line"), label="C>A")
|
|
364 grid.text(0.27, unit(1,"npc") - unit(1.4,"line"), label="C>G")
|
|
365 grid.text(0.42, unit(1,"npc") - unit(1.4,"line"), label="C>T")
|
|
366 grid.text(0.58, unit(1,"npc") - unit(1.4,"line"), label="T>A")
|
|
367 grid.text(0.74, unit(1,"npc") - unit(1.4,"line"), label="T>C")
|
|
368 grid.text(0.89, unit(1,"npc") - unit(1.4,"line"), label="T>G")
|
|
369 invisible( dev.off() )
|
|
370
|
|
371
|
|
372
|
|
373 ###############################################################################
|
|
374 # Contribution of mutational signature in each samples #
|
|
375 ###############################################################################
|
|
376
|
|
377 # Calculate the variability expain by the model (evar)
|
|
378 rss <- rss(res, matrixNMF)
|
|
379 varTot <- sum(matrixNMF^2)
|
|
380 evar <- 1 - rss / varTot
|
|
381 evar_round <- round(evar, digits=3) * 100
|
|
382
|
|
383 if(is.null(opt$html))
|
|
384 {
|
|
385 cat("\n", evar_round, "% of the variance is explained with", opt$nbSignature, "signatures\n\n")
|
|
386 }
|
|
387
|
|
388 # Recover the total number of SBS per samples
|
|
389 NbSBS <- colSums(matrixNMF)
|
|
390 # Normalized matrix H to 100%
|
|
391 matrixH_norm <- t((t(matrixH)/colSums(matrixH))*100)
|
|
392 # Add a row name
|
|
393 rownames(matrixH_norm) <- rownames(matrixH_norm, do.NULL = FALSE, prefix = "row")
|
|
394 # Replace the signature number by letter
|
|
395 rownames(matrixH_norm) <- sapply(1:length(rownames(matrixH_norm)), function(x) { ConvertNb2Aphabet(rownames(matrixH_norm)[x]) } )
|
|
396
|
|
397 ## Combined the contribution with the total number of SBS
|
|
398 matrixH_norm_melt <- melt(matrixH_norm)
|
|
399 matrixH_norm_melt <- cbind(matrixH_norm_melt, rep(NbSBS, each = opt$nbSignature))
|
|
400 colnames(matrixH_norm_melt) <- c("Signature", "Sample", "Percent_Contri", "Total_SBS")
|
|
401
|
|
402 # Calculate the contribution in number of SBS
|
|
403 matrixH_norm_melt$ContriSBS <- sapply(1:nrow(matrixH_norm_melt), function(x) { Contri2SignSBS(matrixH_norm_melt$Total_SBS[x], matrixH_norm_melt$Percent_Contri[x]) } )
|
|
404 colnames(matrixH_norm_melt) <- c("Signature", "Sample", "Percent_Contri", "Total_SBS", "CountSBS_Contri")
|
|
405
|
|
406 # Save the matrix
|
|
407 write.table(matrixH_norm_melt, file=output_matrixH_ggplot2, quote=F, sep="\t", col.names=T, row.names=F)
|
|
408
|
|
409 # Base plot for the contribution of each samples according the count of mutations
|
|
410 p2 <- ggplot(matrixH_norm_melt, aes(x=reorder(Sample, -CountSBS_Contri), y=CountSBS_Contri, fill=Signature)) + geom_bar(stat="identity") + theme_classic()
|
|
411 # Reverse the y axis
|
|
412 p2 <- p2 + scale_y_reverse()
|
|
413 # Rename the y and x axis
|
|
414 p2 <- p2 + ylab("Number of mutations") + xlab("Samples")
|
|
415 # Remove the x axis line
|
|
416 p2 <- p2 + theme(axis.line.x=element_blank())
|
|
417 # Add sample names
|
|
418 if(ncol(matrixNMF) <= 35)
|
|
419 {
|
|
420 p2 <- p2 + theme(axis.text.x = element_text(angle=90))
|
|
421 } else
|
|
422 {
|
|
423 p2 <- p2 + theme(axis.text.x = element_blank())
|
|
424 }
|
|
425
|
|
426 # Base plot for the contribution of each samples in percentages
|
|
427 p3 <- ggplot(matrixH_norm_melt, aes(x=reorder(Sample, -CountSBS_Contri), y=Percent_Contri, fill=Signature)) + geom_bar(stat="identity") + theme_classic() + theme(axis.text.x = element_blank()) + xlab("") + ylab("% of mutations")
|
|
428 # Remove the x axis line
|
|
429 p3 <- p3 + theme(axis.line.x=element_blank(), axis.ticks.x=element_blank())
|
|
430
|
|
431 # Plot PNG
|
|
432 png(figure_matrixH_png, width=3000, heigh=2000, res=300)
|
|
433 # Combined the two plots for the contribution of the samples
|
|
434 suppressWarnings( grid_arrange_shared_legend(p3, p2) )
|
|
435 invisible( dev.off() )
|
|
436
|
|
437
|
|
438 ###############################################################################
|
|
439 # Average contributions of each signature in each cluster #
|
|
440 ###############################################################################
|
|
441
|
|
442 matrixH_cluster <- cbind(matrix_cluster[,1], t(matrixH_norm))
|
|
443 colnames(matrixH_cluster) <- c("Cluster", colnames(t(matrixH_norm)))
|
|
444
|
|
445 df <- as.data.frame(matrixH_cluster)
|
|
446
|
|
447 tmp_mat <- sapply(1:opt$nbSignature, function(x) { meanCluster(df[df[,1] == x,]) } )
|
|
448 # Add a name for the row and the col
|
|
449 rownames(tmp_mat) <- sapply(1:opt$nbSignature, function(x) { paste0("Sig. ", x) } )
|
|
450 colnames(tmp_mat) <- sapply(1:opt$nbSignature, function(x) { paste0("Cluster ", x) } )
|
|
451 tmp_mat <- t(tmp_mat)
|
|
452 # Recover the number of samples in each cluster
|
|
453 nbSampleByCluster <- sapply(1:opt$nbSignature, function(x) { as.numeric( strsplit( as.character(dim(df[df[,1] == x,])), " " ) ) } )
|
|
454 # Combined the average contribution and the number of samples
|
|
455 tmp_mat <- cbind(tmp_mat, nbSampleByCluster[1,])
|
|
456 # Add a name for the row and the col
|
|
457 colnames(tmp_mat)[opt$nbSignature+1] <- "Number of samples"
|
|
458 # Save the matrix
|
|
459 write.table(tmp_mat, file=output_matrixH_cluster, quote=F, sep="\t", col.names=T, row.names=T)
|
|
460
|
|
461 ## Create an image of the table with ggplot2
|
|
462 # Dummy plot
|
|
463 p4 <- qplot(1:10, 1:10, geom = "blank") +
|
|
464 theme(panel.grid.major = element_blank(),
|
|
465 panel.grid.minor = element_blank(),
|
|
466 panel.border = element_rect(fill=NA,color="white", size=0.5, linetype="solid"),
|
|
467 axis.line = element_blank(),
|
|
468 axis.ticks = element_blank(),
|
|
469 panel.background = element_rect(fill="white"),
|
|
470 plot.background = element_rect(fill="white"),
|
|
471 legend.position = "none",
|
|
472 axis.text = element_blank(),
|
|
473 axis.title = element_blank()
|
|
474 )
|
|
475 # Adding a table
|
|
476 p4 <- p4 + annotation_custom(grob = tableGrob(tmp_mat),
|
|
477 xmin = 4, xmax = 7,
|
|
478 ymin = 0, ymax = 10)
|
|
479
|
|
480 # Save the table
|
|
481 png(figure_matrixH_cluster, width=2500, heigh=1000, res=300)
|
|
482 # Combined the two plots for the contribution of the samples
|
|
483 plot(p4)
|
|
484 invisible( dev.off() )
|
|
485
|
|
486
|
|
487 # Delete the empty plot created by the script
|
|
488 if (file.exists("Rplots.pdf")) invisible( file.remove("Rplots.pdf") )
|
|
489
|
|
490
|
|
491
|
|
492 ###############################################################################
|
|
493 # Create HTML output for Galaxy #
|
|
494 ###############################################################################
|
|
495 if(! is.null(opt$html))
|
|
496 {
|
|
497 # Galaxy doesn't need the full path to the files so redefine the output filenames
|
|
498 output_cluster_html <- paste0("NMF/Files/Cluster_MixtureCoeff.txt")
|
|
499 figure_cluster_html <- paste0("NMF/Figures/Heatmap_MixtureCoeff.png")
|
|
500 output_matrixW_html <- paste0("NMF/Files/MatrixW-Normto100.txt")
|
|
501 output_matrixH_ggplot2_html <- paste0("NMF/Files/MatrixH-Inputggplot2.txt")
|
|
502 output_matrixH_cluster_html <- paste0("NMF/Files/Average_ContriByCluster.txt")
|
|
503 figure_matrixW_png_html <- paste0("NMF/Figures/CompositionSomaticMutation.png")
|
|
504 figure_matrixH_png_html <- paste0("NMF/Figures/ContributionMutationSignature.png")
|
|
505 figure_matrixH_cluster_html <- paste0("NMF/Figures/Average_ContriByCluster.png")
|
|
506
|
|
507
|
|
508 #### Create an archive with all the results
|
|
509 setwd(opt$output)
|
|
510 # zip("NMF.tar.gz", "NMF")
|
|
511 system("zip -r NMF.zip NMF")
|
|
512
|
|
513 write("<html><body>", file=opt$html)
|
|
514 write("<center> <h2> NMF Mutational signatures analysis </h2> </center>", file=opt$html, append=TRUE)
|
|
515
|
|
516 write("<br/> Download the results", file=opt$html, append=TRUE)
|
|
517 write("<br/><a href=NMF.zip>NMF.zip</a><br/>", file=opt$html, append=TRUE)
|
|
518
|
|
519 #### Heatmap
|
|
520 write("<table>", file=opt$html, append=TRUE)
|
|
521 write("<tr> <br/> <th><h3>Heatmap of the mixture coefficient matrix</h3></th> </tr>", file=opt$html, append=TRUE)
|
|
522 write(paste0("<tr> <td> <center> <br/> <a href=", output_cluster_html, ">Cluster_MixtureCoeff.txt</a> </center> </td> </tr>"), file=opt$html, append=TRUE)
|
|
523 write("<tr>", file=opt$html, append=TRUE)
|
|
524
|
|
525 if(!file.exists(figure_cluster))
|
|
526 {
|
|
527 write("WARNING: NMF package can't plot the heatmap when the samples size is above 300. <br/>", file=opt$html, append=TRUE)
|
|
528 }else{
|
|
529 write(paste0("<td> <center> <a href=", figure_cluster_html, ">"), file=opt$html, append=TRUE)
|
|
530 write(paste0("<img src=", figure_cluster_html, "/></a> <center> </td>"), file=opt$html, append=TRUE)
|
|
531 }
|
|
532 write("</tr>", file=opt$html, append=TRUE)
|
|
533 write("</table>", file=opt$html, append=TRUE)
|
|
534
|
|
535 ### Signature composition
|
|
536 write("<br/><br/>", file=opt$html, append=TRUE)
|
|
537 write("<table>", file=opt$html, append=TRUE)
|
|
538 write("<tr>", file=opt$html, append=TRUE)
|
|
539 write("<th><h3>Signature composition</h3></th>", file=opt$html, append=TRUE)
|
|
540 write("</tr>", file=opt$html, append=TRUE)
|
|
541 write(paste0("<tr><td>", evar_round, "% of the variance is explained with ", opt$nbSignature, " signatures", "</td></tr>"), file=opt$html, append=TRUE)
|
|
542 write("<tr height=15></tr>", file=opt$html, append=TRUE)
|
|
543 write(paste0("<tr><td> <center> <a href=", output_matrixW_html ,">Composition somatic mutation (input matrix for the tool MutSpec-Compare)</a><center></td></tr>"), file=opt$html, append=TRUE)
|
|
544 write("<tr>", file=opt$html, append=TRUE)
|
|
545 write(paste0("<td><a href=", figure_matrixW_png_html, ">"), file=opt$html, append=TRUE)
|
|
546 write(paste0("<img width=1000 src=", figure_matrixW_png_html, "/></a></td>"), file=opt$html, append=TRUE)
|
|
547 write("</tr> ", file=opt$html, append=TRUE)
|
|
548 write("</table>", file=opt$html, append=TRUE)
|
|
549 write("<br/><br/>", file=opt$html, append=TRUE)
|
|
550
|
|
551 ### Sample contribution to signatures
|
|
552 write("<table>", file=opt$html, append=TRUE)
|
|
553 write("<tr>", file=opt$html, append=TRUE)
|
|
554 write("<th><h3>Sample contribution to signatures</h3></th>", file=opt$html, append=TRUE)
|
|
555 write("</tr>", file=opt$html, append=TRUE)
|
|
556 write(paste0("<tr><td> <center> <a href=", output_matrixH_ggplot2_html, ">Contribution mutation signature matrix</a></center></td></tr>"), file=opt$html, append=TRUE)
|
|
557 write("<tr>", file=opt$html, append=TRUE)
|
|
558 write(paste0("<td><a href=", figure_matrixH_png_html, ">"), file=opt$html, append=TRUE)
|
|
559 write(paste0("<img width=700 src=", figure_matrixH_png_html, "/></a></td>"), file=opt$html, append=TRUE)
|
|
560 write("</tr>", file=opt$html, append=TRUE)
|
|
561 write("</table>", file=opt$html, append=TRUE)
|
|
562 write("<br/><br/>", file=opt$html, append=TRUE)
|
|
563
|
|
564 ### Average contributions of each signatures in each cluster
|
|
565 write("<table>", file=opt$html, append=TRUE)
|
|
566 write("<tr>", file=opt$html, append=TRUE)
|
|
567 write("<th><h3>Average contributions of each signatures in each cluster</h3></th>", file=opt$html, append=TRUE)
|
|
568 write(paste0("<tr><td> <center> <a href=", output_matrixH_cluster_html, ">Average contributions</a></center></td></tr>"), file=opt$html, append=TRUE)
|
|
569 write("<tr>", file=opt$html, append=TRUE)
|
|
570 write(paste0("<td><a href=", figure_matrixH_cluster_html, ">"), file=opt$html, append=TRUE)
|
|
571 write(paste0("<img width=700 src=", figure_matrixH_cluster_html, "/></a></td>"), file=opt$html, append=TRUE)
|
|
572 write("</tr> ", file=opt$html, append=TRUE)
|
|
573 write("</table>", file=opt$html, append=TRUE)
|
|
574 write("<br/><br/>", file=opt$html, append=TRUE)
|
|
575
|
|
576 write("<br/><br/><br/><br/>", file=opt$html, append=TRUE)
|
|
577 }
|