comparison R/somaticSignature_Galaxy.r @ 7:eda59b985b1c draft default tip

Uploaded
author iarc
date Mon, 13 Mar 2017 08:21:19 -0400
parents 46a10309dfe2
children
comparison
equal deleted inserted replaced
6:46a10309dfe2 7:eda59b985b1c
1 #!/usr/bin/Rscript 1 #!/usr/bin/Rscript
2 2
3 #-----------------------------------# 3 #-----------------------------------#
4 # Author: Maude # 4 # Author: Maude #
5 # Script: somaticSignature_Galaxy.r # 5 # Script: somaticSignature_Galaxy.r #
6 # Last update: 29/07/15 # 6 # Last update: 17/02/17 #
7 #-----------------------------------# 7 #-----------------------------------#
8 8
9 9
10 ######################################################################################################################################### 10 #########################################################################################################################################
11 # Run NMF algorithm and represent the composition of somatic signatures and the contribution in each samples # 11 # Run NMF algorithm and represent the composition of somatic signatures and the contribution in each samples #
23 spec = matrix(c( 23 spec = matrix(c(
24 "input" , "i", 1, "character", 24 "input" , "i", 1, "character",
25 "nbSignature", "nbSign", 1, "integer", 25 "nbSignature", "nbSign", 1, "integer",
26 "cpu", "cpu", 1, "integer", 26 "cpu", "cpu", 1, "integer",
27 "output", "o", 1, "character", 27 "output", "o", 1, "character",
28 "html", "html", 0, "character",
28 "help", "h", 0, "logical" 29 "help", "h", 0, "logical"
29 ), 30 ),
30 byrow=TRUE, ncol=4 31 byrow=TRUE, ncol=4
31 ) 32 )
32 33
33 opt = getopt(spec); 34 opt = getopt(spec);
34 35
35 # No argument is pass to the command line 36 # No argument is pass to the command line
36 if(length(opt) == 1) 37 if(length(opt) == 1)
37 { 38 {
38 cat(paste("Usage:\n somaticSignature_Galaxy.r --input <matrix> --nbSignature <nbSign> --cpu <cpu> --output <outputdir>\n",sep="")) 39 cat(paste("Usage:\n somaticSignature_Galaxy.r --input <matrix> --nbSignature <nbSign> --cpu <cpu> --output <outputdir> --html <html_for_Galaxy>\n",sep=""))
40
41 cat(paste0("\n--input Input matrix created with the tool MutSpec-Stat\n--nbSignature Number of signatures to extract (min = 2)\n--cpu Number of CPUs\n--output Output directory\n--html Path to HTML page (ONLY FOR GALAXY WRAPPER)\n"))
42
39 q(status=1) 43 q(status=1)
40 } 44 }
41 45
42 # Help was asked for. 46 # Help was asked for.
43 if ( !is.null(opt$help) ) 47 if ( !is.null(opt$help) )
44 { 48 {
45 # print a friendly message and exit with a non-zero error code 49 # print a friendly message and exit with a non-zero error code
46 cat(paste("Usage:\n somaticSignature_Galaxy.r --input <matrix> --nbSignature <nbSign> --cpu <cpu> --output <outputdir>\n",sep="")) 50 cat(paste("Usage:\n somaticSignature_Galaxy.r --input <matrix> --nbSignature <nbSign> --cpu <cpu> --output <outputdir> --html <html_for_Galaxy>\n",sep=""))
47 q(status=1) 51 q(status=1)
48 } 52 }
49 53
50 54
51 55
199 203
200 204
201 ############################################################################### 205 ###############################################################################
202 # Run NMF # 206 # Run NMF #
203 ############################################################################### 207 ###############################################################################
204 208 # Create outdir
209 dir.create(opt$output)
205 # Create the output directories 210 # Create the output directories
206 output_NMF <- paste0(opt$output, "/NMF") 211 output_NMF <- paste0(opt$output, "/NMF")
207 dir.create(output_NMF) 212 dir.create(output_NMF)
208 output_Figures <- paste0(output_NMF, "/", "Figures") 213 output_Figures <- paste0(output_NMF, "/", "Figures")
209 dir.create(output_Figures) 214 dir.create(output_Figures)
367 372
368 ############################################################################### 373 ###############################################################################
369 # Contribution of mutational signature in each samples # 374 # Contribution of mutational signature in each samples #
370 ############################################################################### 375 ###############################################################################
371 376
377 # Calculate the variability expain by the model (evar)
378 rss <- rss(res, matrixNMF)
379 varTot <- sum(matrixNMF^2)
380 evar <- 1 - rss / varTot
381 evar_round <- round(evar, digits=3) * 100
382
383 if(is.null(opt$html))
384 {
385 cat("\n", evar_round, "% of the variance is explained with", opt$nbSignature, "signatures\n\n")
386 }
387
372 # Recover the total number of SBS per samples 388 # Recover the total number of SBS per samples
373 NbSBS <- colSums(matrixNMF) 389 NbSBS <- colSums(matrixNMF)
374 # Normalized matrix H to 100% 390 # Normalized matrix H to 100%
375 matrixH_norm <- t((t(matrixH)/colSums(matrixH))*100) 391 matrixH_norm <- t((t(matrixH)/colSums(matrixH))*100)
376 # Add a row name 392 # Add a row name
379 rownames(matrixH_norm) <- sapply(1:length(rownames(matrixH_norm)), function(x) { ConvertNb2Aphabet(rownames(matrixH_norm)[x]) } ) 395 rownames(matrixH_norm) <- sapply(1:length(rownames(matrixH_norm)), function(x) { ConvertNb2Aphabet(rownames(matrixH_norm)[x]) } )
380 396
381 ## Combined the contribution with the total number of SBS 397 ## Combined the contribution with the total number of SBS
382 matrixH_norm_melt <- melt(matrixH_norm) 398 matrixH_norm_melt <- melt(matrixH_norm)
383 matrixH_norm_melt <- cbind(matrixH_norm_melt, rep(NbSBS, each = opt$nbSignature)) 399 matrixH_norm_melt <- cbind(matrixH_norm_melt, rep(NbSBS, each = opt$nbSignature))
384 colnames(matrixH_norm_melt) <- c("Signature", "Sample", "Value", "Total_SBS") 400 colnames(matrixH_norm_melt) <- c("Signature", "Sample", "Percent_Contri", "Total_SBS")
385 401
386 # Calculate the contribution in number of SBS 402 # Calculate the contribution in number of SBS
387 matrixH_norm_melt$ContriSBS <- sapply(1:nrow(matrixH_norm_melt), function(x) { Contri2SignSBS(matrixH_norm_melt$Total_SBS[x], matrixH_norm_melt$Value[x]) } ) 403 matrixH_norm_melt$ContriSBS <- sapply(1:nrow(matrixH_norm_melt), function(x) { Contri2SignSBS(matrixH_norm_melt$Total_SBS[x], matrixH_norm_melt$Percent_Contri[x]) } )
388 404 colnames(matrixH_norm_melt) <- c("Signature", "Sample", "Percent_Contri", "Total_SBS", "CountSBS_Contri")
389 405
390 # Save the matrix 406 # Save the matrix
391 write.table(matrixH_norm_melt, file=output_matrixH_ggplot2, quote=F, sep="\t", col.names=T, row.names=F) 407 write.table(matrixH_norm_melt, file=output_matrixH_ggplot2, quote=F, sep="\t", col.names=T, row.names=F)
392 408
393 # Base plot for the contribution of each samples according the count of mutations 409 # Base plot for the contribution of each samples according the count of mutations
394 p2 <- ggplot(matrixH_norm_melt, aes(x=reorder(Sample, -ContriSBS), y=ContriSBS, fill=Signature)) + geom_bar(stat="identity") + theme_classic() 410 p2 <- ggplot(matrixH_norm_melt, aes(x=reorder(Sample, -CountSBS_Contri), y=CountSBS_Contri, fill=Signature)) + geom_bar(stat="identity") + theme_classic()
395 # Remove the name of samples
396 p2 <- p2 + theme(axis.text.x = element_blank())
397 # Reverse the y axis 411 # Reverse the y axis
398 p2 <- p2 + scale_y_reverse() 412 p2 <- p2 + scale_y_reverse()
399 # Rename the y and x axis 413 # Rename the y and x axis
400 p2 <- p2 + ylab("Number of mutations") + xlab("Samples") 414 p2 <- p2 + ylab("Number of mutations") + xlab("Samples")
401 # Remove the x axis line 415 # Remove the x axis line
402 p2 <- p2 + theme(axis.line.x=element_blank()) 416 p2 <- p2 + theme(axis.line.x=element_blank())
417 # Add sample names
418 if(ncol(matrixNMF) <= 35)
419 {
420 p2 <- p2 + theme(axis.text.x = element_text(angle=90))
421 } else
422 {
423 p2 <- p2 + theme(axis.text.x = element_blank())
424 }
403 425
404 # Base plot for the contribution of each samples in percentages 426 # Base plot for the contribution of each samples in percentages
405 p3 <- ggplot(matrixH_norm_melt, aes(x=reorder(Sample, -ContriSBS), y=Value, fill=Signature)) + geom_bar(stat="identity") + theme_classic() + theme(axis.text.x = element_blank()) + xlab("") + ylab("% of mutations") 427 p3 <- ggplot(matrixH_norm_melt, aes(x=reorder(Sample, -CountSBS_Contri), y=Percent_Contri, fill=Signature)) + geom_bar(stat="identity") + theme_classic() + theme(axis.text.x = element_blank()) + xlab("") + ylab("% of mutations")
406 # Remove the x axis line 428 # Remove the x axis line
407 p3 <- p3 + theme(axis.line.x=element_blank(), axis.ticks.x=element_blank()) 429 p3 <- p3 + theme(axis.line.x=element_blank(), axis.ticks.x=element_blank())
408
409 430
410 # Plot PNG 431 # Plot PNG
411 png(figure_matrixH_png, width=3000, heigh=2000, res=300) 432 png(figure_matrixH_png, width=3000, heigh=2000, res=300)
412 # Combined the two plots for the contribution of the samples 433 # Combined the two plots for the contribution of the samples
413 suppressWarnings( grid_arrange_shared_legend(p3, p2) ) 434 suppressWarnings( grid_arrange_shared_legend(p3, p2) )
437 # Save the matrix 458 # Save the matrix
438 write.table(tmp_mat, file=output_matrixH_cluster, quote=F, sep="\t", col.names=T, row.names=T) 459 write.table(tmp_mat, file=output_matrixH_cluster, quote=F, sep="\t", col.names=T, row.names=T)
439 460
440 ## Create an image of the table with ggplot2 461 ## Create an image of the table with ggplot2
441 # Dummy plot 462 # Dummy plot
442 p4 <- qplot(1:10, 1:10, geom = "blank") + 463 p4 <- qplot(1:10, 1:10, geom = "blank") +
443 theme(panel.grid.major = element_blank(), 464 theme(panel.grid.major = element_blank(),
444 panel.grid.minor = element_blank(), 465 panel.grid.minor = element_blank(),
445 panel.border = element_rect(fill=NA,color="white", size=0.5, linetype="solid"), 466 panel.border = element_rect(fill=NA,color="white", size=0.5, linetype="solid"),
446 axis.line = element_blank(), 467 axis.line = element_blank(),
447 axis.ticks = element_blank(), 468 axis.ticks = element_blank(),
448 panel.background = element_rect(fill="white"), 469 panel.background = element_rect(fill="white"),
449 plot.background = element_rect(fill="white"), 470 plot.background = element_rect(fill="white"),
450 legend.position = "none", 471 legend.position = "none",
451 axis.text = element_blank(), 472 axis.text = element_blank(),
452 axis.title = element_blank() 473 axis.title = element_blank()
453 ) 474 )
454 # Adding a table 475 # Adding a table
455 p4 <- p4 + annotation_custom(grob = tableGrob(tmp_mat), 476 p4 <- p4 + annotation_custom(grob = tableGrob(tmp_mat),
463 invisible( dev.off() ) 484 invisible( dev.off() )
464 485
465 486
466 # Delete the empty plot created by the script 487 # Delete the empty plot created by the script
467 if (file.exists("Rplots.pdf")) invisible( file.remove("Rplots.pdf") ) 488 if (file.exists("Rplots.pdf")) invisible( file.remove("Rplots.pdf") )
489
490
491
492 ###############################################################################
493 # Create HTML output for Galaxy #
494 ###############################################################################
495 if(! is.null(opt$html))
496 {
497 # Galaxy doesn't need the full path to the files so redefine the output filenames
498 output_cluster_html <- paste0("NMF/Files/Cluster_MixtureCoeff.txt")
499 figure_cluster_html <- paste0("NMF/Figures/Heatmap_MixtureCoeff.png")
500 output_matrixW_html <- paste0("NMF/Files/MatrixW-Normto100.txt")
501 output_matrixH_ggplot2_html <- paste0("NMF/Files/MatrixH-Inputggplot2.txt")
502 output_matrixH_cluster_html <- paste0("NMF/Files/Average_ContriByCluster.txt")
503 figure_matrixW_png_html <- paste0("NMF/Figures/CompositionSomaticMutation.png")
504 figure_matrixH_png_html <- paste0("NMF/Figures/ContributionMutationSignature.png")
505 figure_matrixH_cluster_html <- paste0("NMF/Figures/Average_ContriByCluster.png")
506
507
508 #### Create an archive with all the results
509 setwd(opt$output)
510 # zip("NMF.tar.gz", "NMF")
511 system("zip -r NMF.zip NMF")
512
513 write("<html><body>", file=opt$html)
514 write("<center> <h2> NMF Mutational signatures analysis </h2> </center>", file=opt$html, append=TRUE)
515
516 write("<br/> Download the results", file=opt$html, append=TRUE)
517 write("<br/><a href=NMF.zip>NMF.zip</a><br/>", file=opt$html, append=TRUE)
518
519 #### Heatmap
520 write("<table>", file=opt$html, append=TRUE)
521 write("<tr> <br/> <th><h3>Heatmap of the mixture coefficient matrix</h3></th> </tr>", file=opt$html, append=TRUE)
522 write(paste0("<tr> <td> <center> <br/> <a href=", output_cluster_html, ">Cluster_MixtureCoeff.txt</a> </center> </td> </tr>"), file=opt$html, append=TRUE)
523 write("<tr>", file=opt$html, append=TRUE)
524
525 if(!file.exists(figure_cluster))
526 {
527 write("WARNING: NMF package can't plot the heatmap when the samples size is above 300. <br/>", file=opt$html, append=TRUE)
528 }else{
529 write(paste0("<td> <center> <a href=", figure_cluster_html, ">"), file=opt$html, append=TRUE)
530 write(paste0("<img src=", figure_cluster_html, "/></a> <center> </td>"), file=opt$html, append=TRUE)
531 }
532 write("</tr>", file=opt$html, append=TRUE)
533 write("</table>", file=opt$html, append=TRUE)
534
535 ### Signature composition
536 write("<br/><br/>", file=opt$html, append=TRUE)
537 write("<table>", file=opt$html, append=TRUE)
538 write("<tr>", file=opt$html, append=TRUE)
539 write("<th><h3>Signature composition</h3></th>", file=opt$html, append=TRUE)
540 write("</tr>", file=opt$html, append=TRUE)
541 write(paste0("<tr><td>", evar_round, "% of the variance is explained with ", opt$nbSignature, " signatures", "</td></tr>"), file=opt$html, append=TRUE)
542 write("<tr height=15></tr>", file=opt$html, append=TRUE)
543 write(paste0("<tr><td> <center> <a href=", output_matrixW_html ,">Composition somatic mutation (input matrix for the tool MutSpec-Compare)</a><center></td></tr>"), file=opt$html, append=TRUE)
544 write("<tr>", file=opt$html, append=TRUE)
545 write(paste0("<td><a href=", figure_matrixW_png_html, ">"), file=opt$html, append=TRUE)
546 write(paste0("<img width=1000 src=", figure_matrixW_png_html, "/></a></td>"), file=opt$html, append=TRUE)
547 write("</tr> ", file=opt$html, append=TRUE)
548 write("</table>", file=opt$html, append=TRUE)
549 write("<br/><br/>", file=opt$html, append=TRUE)
550
551 ### Sample contribution to signatures
552 write("<table>", file=opt$html, append=TRUE)
553 write("<tr>", file=opt$html, append=TRUE)
554 write("<th><h3>Sample contribution to signatures</h3></th>", file=opt$html, append=TRUE)
555 write("</tr>", file=opt$html, append=TRUE)
556 write(paste0("<tr><td> <center> <a href=", output_matrixH_ggplot2_html, ">Contribution mutation signature matrix</a></center></td></tr>"), file=opt$html, append=TRUE)
557 write("<tr>", file=opt$html, append=TRUE)
558 write(paste0("<td><a href=", figure_matrixH_png_html, ">"), file=opt$html, append=TRUE)
559 write(paste0("<img width=700 src=", figure_matrixH_png_html, "/></a></td>"), file=opt$html, append=TRUE)
560 write("</tr>", file=opt$html, append=TRUE)
561 write("</table>", file=opt$html, append=TRUE)
562 write("<br/><br/>", file=opt$html, append=TRUE)
563
564 ### Average contributions of each signatures in each cluster
565 write("<table>", file=opt$html, append=TRUE)
566 write("<tr>", file=opt$html, append=TRUE)
567 write("<th><h3>Average contributions of each signatures in each cluster</h3></th>", file=opt$html, append=TRUE)
568 write(paste0("<tr><td> <center> <a href=", output_matrixH_cluster_html, ">Average contributions</a></center></td></tr>"), file=opt$html, append=TRUE)
569 write("<tr>", file=opt$html, append=TRUE)
570 write(paste0("<td><a href=", figure_matrixH_cluster_html, ">"), file=opt$html, append=TRUE)
571 write(paste0("<img width=700 src=", figure_matrixH_cluster_html, "/></a></td>"), file=opt$html, append=TRUE)
572 write("</tr> ", file=opt$html, append=TRUE)
573 write("</table>", file=opt$html, append=TRUE)
574 write("<br/><br/>", file=opt$html, append=TRUE)
575
576 write("<br/><br/><br/><br/>", file=opt$html, append=TRUE)
577 }