Mercurial > repos > iarc > mutspec
comparison R/somaticSignature_Galaxy.r @ 7:eda59b985b1c draft default tip
Uploaded
author | iarc |
---|---|
date | Mon, 13 Mar 2017 08:21:19 -0400 |
parents | 46a10309dfe2 |
children |
comparison
equal
deleted
inserted
replaced
6:46a10309dfe2 | 7:eda59b985b1c |
---|---|
1 #!/usr/bin/Rscript | 1 #!/usr/bin/Rscript |
2 | 2 |
3 #-----------------------------------# | 3 #-----------------------------------# |
4 # Author: Maude # | 4 # Author: Maude # |
5 # Script: somaticSignature_Galaxy.r # | 5 # Script: somaticSignature_Galaxy.r # |
6 # Last update: 29/07/15 # | 6 # Last update: 17/02/17 # |
7 #-----------------------------------# | 7 #-----------------------------------# |
8 | 8 |
9 | 9 |
10 ######################################################################################################################################### | 10 ######################################################################################################################################### |
11 # Run NMF algorithm and represent the composition of somatic signatures and the contribution in each samples # | 11 # Run NMF algorithm and represent the composition of somatic signatures and the contribution in each samples # |
23 spec = matrix(c( | 23 spec = matrix(c( |
24 "input" , "i", 1, "character", | 24 "input" , "i", 1, "character", |
25 "nbSignature", "nbSign", 1, "integer", | 25 "nbSignature", "nbSign", 1, "integer", |
26 "cpu", "cpu", 1, "integer", | 26 "cpu", "cpu", 1, "integer", |
27 "output", "o", 1, "character", | 27 "output", "o", 1, "character", |
28 "html", "html", 0, "character", | |
28 "help", "h", 0, "logical" | 29 "help", "h", 0, "logical" |
29 ), | 30 ), |
30 byrow=TRUE, ncol=4 | 31 byrow=TRUE, ncol=4 |
31 ) | 32 ) |
32 | 33 |
33 opt = getopt(spec); | 34 opt = getopt(spec); |
34 | 35 |
35 # No argument is pass to the command line | 36 # No argument is pass to the command line |
36 if(length(opt) == 1) | 37 if(length(opt) == 1) |
37 { | 38 { |
38 cat(paste("Usage:\n somaticSignature_Galaxy.r --input <matrix> --nbSignature <nbSign> --cpu <cpu> --output <outputdir>\n",sep="")) | 39 cat(paste("Usage:\n somaticSignature_Galaxy.r --input <matrix> --nbSignature <nbSign> --cpu <cpu> --output <outputdir> --html <html_for_Galaxy>\n",sep="")) |
40 | |
41 cat(paste0("\n--input Input matrix created with the tool MutSpec-Stat\n--nbSignature Number of signatures to extract (min = 2)\n--cpu Number of CPUs\n--output Output directory\n--html Path to HTML page (ONLY FOR GALAXY WRAPPER)\n")) | |
42 | |
39 q(status=1) | 43 q(status=1) |
40 } | 44 } |
41 | 45 |
42 # Help was asked for. | 46 # Help was asked for. |
43 if ( !is.null(opt$help) ) | 47 if ( !is.null(opt$help) ) |
44 { | 48 { |
45 # print a friendly message and exit with a non-zero error code | 49 # print a friendly message and exit with a non-zero error code |
46 cat(paste("Usage:\n somaticSignature_Galaxy.r --input <matrix> --nbSignature <nbSign> --cpu <cpu> --output <outputdir>\n",sep="")) | 50 cat(paste("Usage:\n somaticSignature_Galaxy.r --input <matrix> --nbSignature <nbSign> --cpu <cpu> --output <outputdir> --html <html_for_Galaxy>\n",sep="")) |
47 q(status=1) | 51 q(status=1) |
48 } | 52 } |
49 | 53 |
50 | 54 |
51 | 55 |
199 | 203 |
200 | 204 |
201 ############################################################################### | 205 ############################################################################### |
202 # Run NMF # | 206 # Run NMF # |
203 ############################################################################### | 207 ############################################################################### |
204 | 208 # Create outdir |
209 dir.create(opt$output) | |
205 # Create the output directories | 210 # Create the output directories |
206 output_NMF <- paste0(opt$output, "/NMF") | 211 output_NMF <- paste0(opt$output, "/NMF") |
207 dir.create(output_NMF) | 212 dir.create(output_NMF) |
208 output_Figures <- paste0(output_NMF, "/", "Figures") | 213 output_Figures <- paste0(output_NMF, "/", "Figures") |
209 dir.create(output_Figures) | 214 dir.create(output_Figures) |
367 | 372 |
368 ############################################################################### | 373 ############################################################################### |
369 # Contribution of mutational signature in each samples # | 374 # Contribution of mutational signature in each samples # |
370 ############################################################################### | 375 ############################################################################### |
371 | 376 |
377 # Calculate the variability expain by the model (evar) | |
378 rss <- rss(res, matrixNMF) | |
379 varTot <- sum(matrixNMF^2) | |
380 evar <- 1 - rss / varTot | |
381 evar_round <- round(evar, digits=3) * 100 | |
382 | |
383 if(is.null(opt$html)) | |
384 { | |
385 cat("\n", evar_round, "% of the variance is explained with", opt$nbSignature, "signatures\n\n") | |
386 } | |
387 | |
372 # Recover the total number of SBS per samples | 388 # Recover the total number of SBS per samples |
373 NbSBS <- colSums(matrixNMF) | 389 NbSBS <- colSums(matrixNMF) |
374 # Normalized matrix H to 100% | 390 # Normalized matrix H to 100% |
375 matrixH_norm <- t((t(matrixH)/colSums(matrixH))*100) | 391 matrixH_norm <- t((t(matrixH)/colSums(matrixH))*100) |
376 # Add a row name | 392 # Add a row name |
379 rownames(matrixH_norm) <- sapply(1:length(rownames(matrixH_norm)), function(x) { ConvertNb2Aphabet(rownames(matrixH_norm)[x]) } ) | 395 rownames(matrixH_norm) <- sapply(1:length(rownames(matrixH_norm)), function(x) { ConvertNb2Aphabet(rownames(matrixH_norm)[x]) } ) |
380 | 396 |
381 ## Combined the contribution with the total number of SBS | 397 ## Combined the contribution with the total number of SBS |
382 matrixH_norm_melt <- melt(matrixH_norm) | 398 matrixH_norm_melt <- melt(matrixH_norm) |
383 matrixH_norm_melt <- cbind(matrixH_norm_melt, rep(NbSBS, each = opt$nbSignature)) | 399 matrixH_norm_melt <- cbind(matrixH_norm_melt, rep(NbSBS, each = opt$nbSignature)) |
384 colnames(matrixH_norm_melt) <- c("Signature", "Sample", "Value", "Total_SBS") | 400 colnames(matrixH_norm_melt) <- c("Signature", "Sample", "Percent_Contri", "Total_SBS") |
385 | 401 |
386 # Calculate the contribution in number of SBS | 402 # Calculate the contribution in number of SBS |
387 matrixH_norm_melt$ContriSBS <- sapply(1:nrow(matrixH_norm_melt), function(x) { Contri2SignSBS(matrixH_norm_melt$Total_SBS[x], matrixH_norm_melt$Value[x]) } ) | 403 matrixH_norm_melt$ContriSBS <- sapply(1:nrow(matrixH_norm_melt), function(x) { Contri2SignSBS(matrixH_norm_melt$Total_SBS[x], matrixH_norm_melt$Percent_Contri[x]) } ) |
388 | 404 colnames(matrixH_norm_melt) <- c("Signature", "Sample", "Percent_Contri", "Total_SBS", "CountSBS_Contri") |
389 | 405 |
390 # Save the matrix | 406 # Save the matrix |
391 write.table(matrixH_norm_melt, file=output_matrixH_ggplot2, quote=F, sep="\t", col.names=T, row.names=F) | 407 write.table(matrixH_norm_melt, file=output_matrixH_ggplot2, quote=F, sep="\t", col.names=T, row.names=F) |
392 | 408 |
393 # Base plot for the contribution of each samples according the count of mutations | 409 # Base plot for the contribution of each samples according the count of mutations |
394 p2 <- ggplot(matrixH_norm_melt, aes(x=reorder(Sample, -ContriSBS), y=ContriSBS, fill=Signature)) + geom_bar(stat="identity") + theme_classic() | 410 p2 <- ggplot(matrixH_norm_melt, aes(x=reorder(Sample, -CountSBS_Contri), y=CountSBS_Contri, fill=Signature)) + geom_bar(stat="identity") + theme_classic() |
395 # Remove the name of samples | |
396 p2 <- p2 + theme(axis.text.x = element_blank()) | |
397 # Reverse the y axis | 411 # Reverse the y axis |
398 p2 <- p2 + scale_y_reverse() | 412 p2 <- p2 + scale_y_reverse() |
399 # Rename the y and x axis | 413 # Rename the y and x axis |
400 p2 <- p2 + ylab("Number of mutations") + xlab("Samples") | 414 p2 <- p2 + ylab("Number of mutations") + xlab("Samples") |
401 # Remove the x axis line | 415 # Remove the x axis line |
402 p2 <- p2 + theme(axis.line.x=element_blank()) | 416 p2 <- p2 + theme(axis.line.x=element_blank()) |
417 # Add sample names | |
418 if(ncol(matrixNMF) <= 35) | |
419 { | |
420 p2 <- p2 + theme(axis.text.x = element_text(angle=90)) | |
421 } else | |
422 { | |
423 p2 <- p2 + theme(axis.text.x = element_blank()) | |
424 } | |
403 | 425 |
404 # Base plot for the contribution of each samples in percentages | 426 # Base plot for the contribution of each samples in percentages |
405 p3 <- ggplot(matrixH_norm_melt, aes(x=reorder(Sample, -ContriSBS), y=Value, fill=Signature)) + geom_bar(stat="identity") + theme_classic() + theme(axis.text.x = element_blank()) + xlab("") + ylab("% of mutations") | 427 p3 <- ggplot(matrixH_norm_melt, aes(x=reorder(Sample, -CountSBS_Contri), y=Percent_Contri, fill=Signature)) + geom_bar(stat="identity") + theme_classic() + theme(axis.text.x = element_blank()) + xlab("") + ylab("% of mutations") |
406 # Remove the x axis line | 428 # Remove the x axis line |
407 p3 <- p3 + theme(axis.line.x=element_blank(), axis.ticks.x=element_blank()) | 429 p3 <- p3 + theme(axis.line.x=element_blank(), axis.ticks.x=element_blank()) |
408 | |
409 | 430 |
410 # Plot PNG | 431 # Plot PNG |
411 png(figure_matrixH_png, width=3000, heigh=2000, res=300) | 432 png(figure_matrixH_png, width=3000, heigh=2000, res=300) |
412 # Combined the two plots for the contribution of the samples | 433 # Combined the two plots for the contribution of the samples |
413 suppressWarnings( grid_arrange_shared_legend(p3, p2) ) | 434 suppressWarnings( grid_arrange_shared_legend(p3, p2) ) |
437 # Save the matrix | 458 # Save the matrix |
438 write.table(tmp_mat, file=output_matrixH_cluster, quote=F, sep="\t", col.names=T, row.names=T) | 459 write.table(tmp_mat, file=output_matrixH_cluster, quote=F, sep="\t", col.names=T, row.names=T) |
439 | 460 |
440 ## Create an image of the table with ggplot2 | 461 ## Create an image of the table with ggplot2 |
441 # Dummy plot | 462 # Dummy plot |
442 p4 <- qplot(1:10, 1:10, geom = "blank") + | 463 p4 <- qplot(1:10, 1:10, geom = "blank") + |
443 theme(panel.grid.major = element_blank(), | 464 theme(panel.grid.major = element_blank(), |
444 panel.grid.minor = element_blank(), | 465 panel.grid.minor = element_blank(), |
445 panel.border = element_rect(fill=NA,color="white", size=0.5, linetype="solid"), | 466 panel.border = element_rect(fill=NA,color="white", size=0.5, linetype="solid"), |
446 axis.line = element_blank(), | 467 axis.line = element_blank(), |
447 axis.ticks = element_blank(), | 468 axis.ticks = element_blank(), |
448 panel.background = element_rect(fill="white"), | 469 panel.background = element_rect(fill="white"), |
449 plot.background = element_rect(fill="white"), | 470 plot.background = element_rect(fill="white"), |
450 legend.position = "none", | 471 legend.position = "none", |
451 axis.text = element_blank(), | 472 axis.text = element_blank(), |
452 axis.title = element_blank() | 473 axis.title = element_blank() |
453 ) | 474 ) |
454 # Adding a table | 475 # Adding a table |
455 p4 <- p4 + annotation_custom(grob = tableGrob(tmp_mat), | 476 p4 <- p4 + annotation_custom(grob = tableGrob(tmp_mat), |
463 invisible( dev.off() ) | 484 invisible( dev.off() ) |
464 | 485 |
465 | 486 |
466 # Delete the empty plot created by the script | 487 # Delete the empty plot created by the script |
467 if (file.exists("Rplots.pdf")) invisible( file.remove("Rplots.pdf") ) | 488 if (file.exists("Rplots.pdf")) invisible( file.remove("Rplots.pdf") ) |
489 | |
490 | |
491 | |
492 ############################################################################### | |
493 # Create HTML output for Galaxy # | |
494 ############################################################################### | |
495 if(! is.null(opt$html)) | |
496 { | |
497 # Galaxy doesn't need the full path to the files so redefine the output filenames | |
498 output_cluster_html <- paste0("NMF/Files/Cluster_MixtureCoeff.txt") | |
499 figure_cluster_html <- paste0("NMF/Figures/Heatmap_MixtureCoeff.png") | |
500 output_matrixW_html <- paste0("NMF/Files/MatrixW-Normto100.txt") | |
501 output_matrixH_ggplot2_html <- paste0("NMF/Files/MatrixH-Inputggplot2.txt") | |
502 output_matrixH_cluster_html <- paste0("NMF/Files/Average_ContriByCluster.txt") | |
503 figure_matrixW_png_html <- paste0("NMF/Figures/CompositionSomaticMutation.png") | |
504 figure_matrixH_png_html <- paste0("NMF/Figures/ContributionMutationSignature.png") | |
505 figure_matrixH_cluster_html <- paste0("NMF/Figures/Average_ContriByCluster.png") | |
506 | |
507 | |
508 #### Create an archive with all the results | |
509 setwd(opt$output) | |
510 # zip("NMF.tar.gz", "NMF") | |
511 system("zip -r NMF.zip NMF") | |
512 | |
513 write("<html><body>", file=opt$html) | |
514 write("<center> <h2> NMF Mutational signatures analysis </h2> </center>", file=opt$html, append=TRUE) | |
515 | |
516 write("<br/> Download the results", file=opt$html, append=TRUE) | |
517 write("<br/><a href=NMF.zip>NMF.zip</a><br/>", file=opt$html, append=TRUE) | |
518 | |
519 #### Heatmap | |
520 write("<table>", file=opt$html, append=TRUE) | |
521 write("<tr> <br/> <th><h3>Heatmap of the mixture coefficient matrix</h3></th> </tr>", file=opt$html, append=TRUE) | |
522 write(paste0("<tr> <td> <center> <br/> <a href=", output_cluster_html, ">Cluster_MixtureCoeff.txt</a> </center> </td> </tr>"), file=opt$html, append=TRUE) | |
523 write("<tr>", file=opt$html, append=TRUE) | |
524 | |
525 if(!file.exists(figure_cluster)) | |
526 { | |
527 write("WARNING: NMF package can't plot the heatmap when the samples size is above 300. <br/>", file=opt$html, append=TRUE) | |
528 }else{ | |
529 write(paste0("<td> <center> <a href=", figure_cluster_html, ">"), file=opt$html, append=TRUE) | |
530 write(paste0("<img src=", figure_cluster_html, "/></a> <center> </td>"), file=opt$html, append=TRUE) | |
531 } | |
532 write("</tr>", file=opt$html, append=TRUE) | |
533 write("</table>", file=opt$html, append=TRUE) | |
534 | |
535 ### Signature composition | |
536 write("<br/><br/>", file=opt$html, append=TRUE) | |
537 write("<table>", file=opt$html, append=TRUE) | |
538 write("<tr>", file=opt$html, append=TRUE) | |
539 write("<th><h3>Signature composition</h3></th>", file=opt$html, append=TRUE) | |
540 write("</tr>", file=opt$html, append=TRUE) | |
541 write(paste0("<tr><td>", evar_round, "% of the variance is explained with ", opt$nbSignature, " signatures", "</td></tr>"), file=opt$html, append=TRUE) | |
542 write("<tr height=15></tr>", file=opt$html, append=TRUE) | |
543 write(paste0("<tr><td> <center> <a href=", output_matrixW_html ,">Composition somatic mutation (input matrix for the tool MutSpec-Compare)</a><center></td></tr>"), file=opt$html, append=TRUE) | |
544 write("<tr>", file=opt$html, append=TRUE) | |
545 write(paste0("<td><a href=", figure_matrixW_png_html, ">"), file=opt$html, append=TRUE) | |
546 write(paste0("<img width=1000 src=", figure_matrixW_png_html, "/></a></td>"), file=opt$html, append=TRUE) | |
547 write("</tr> ", file=opt$html, append=TRUE) | |
548 write("</table>", file=opt$html, append=TRUE) | |
549 write("<br/><br/>", file=opt$html, append=TRUE) | |
550 | |
551 ### Sample contribution to signatures | |
552 write("<table>", file=opt$html, append=TRUE) | |
553 write("<tr>", file=opt$html, append=TRUE) | |
554 write("<th><h3>Sample contribution to signatures</h3></th>", file=opt$html, append=TRUE) | |
555 write("</tr>", file=opt$html, append=TRUE) | |
556 write(paste0("<tr><td> <center> <a href=", output_matrixH_ggplot2_html, ">Contribution mutation signature matrix</a></center></td></tr>"), file=opt$html, append=TRUE) | |
557 write("<tr>", file=opt$html, append=TRUE) | |
558 write(paste0("<td><a href=", figure_matrixH_png_html, ">"), file=opt$html, append=TRUE) | |
559 write(paste0("<img width=700 src=", figure_matrixH_png_html, "/></a></td>"), file=opt$html, append=TRUE) | |
560 write("</tr>", file=opt$html, append=TRUE) | |
561 write("</table>", file=opt$html, append=TRUE) | |
562 write("<br/><br/>", file=opt$html, append=TRUE) | |
563 | |
564 ### Average contributions of each signatures in each cluster | |
565 write("<table>", file=opt$html, append=TRUE) | |
566 write("<tr>", file=opt$html, append=TRUE) | |
567 write("<th><h3>Average contributions of each signatures in each cluster</h3></th>", file=opt$html, append=TRUE) | |
568 write(paste0("<tr><td> <center> <a href=", output_matrixH_cluster_html, ">Average contributions</a></center></td></tr>"), file=opt$html, append=TRUE) | |
569 write("<tr>", file=opt$html, append=TRUE) | |
570 write(paste0("<td><a href=", figure_matrixH_cluster_html, ">"), file=opt$html, append=TRUE) | |
571 write(paste0("<img width=700 src=", figure_matrixH_cluster_html, "/></a></td>"), file=opt$html, append=TRUE) | |
572 write("</tr> ", file=opt$html, append=TRUE) | |
573 write("</table>", file=opt$html, append=TRUE) | |
574 write("<br/><br/>", file=opt$html, append=TRUE) | |
575 | |
576 write("<br/><br/><br/><br/>", file=opt$html, append=TRUE) | |
577 } |