Mercurial > repos > iarc > mutspec
view R/compareSignature_Galaxy.r @ 7:eda59b985b1c draft default tip
Uploaded
author | iarc |
---|---|
date | Mon, 13 Mar 2017 08:21:19 -0400 |
parents | 46a10309dfe2 |
children |
line wrap: on
line source
#!/usr/bin/Rscript #-----------------------------------# # Author: Maude # # Script: compareSignature_Galaxy.r # # Last update: 14/02/17 # #-----------------------------------# ######################################################################################################################################### # Compare new sigantures with published one using the cosine similarity method # ######################################################################################################################################### #------------------------------------------------------------------------------- # Print a usage message if there is no argument pass to the command line #------------------------------------------------------------------------------- args <- commandArgs(TRUE) usage <- function() { msg <- paste0('Usage:\n', ' compareSignature_Galaxy.r Published_Signature New_Signature Output_Folder\n' ) cat(msg, '\n', file="/dev/stderr") quit(status=1) } input = args[length(args)] if (length(args) == 0) { usage() } #------------------------------------------------------------------------------- # Load library #------------------------------------------------------------------------------- suppressMessages(suppressWarnings(library(lsa))) suppressMessages(suppressWarnings(library(ggplot2))) suppressMessages(suppressWarnings(library(reshape))) #------------------------------------------------------------------------------- # Recover the arguments #------------------------------------------------------------------------------- published_signature_file <- args[1] # The matrix with the published signatures unknown_signature_file <- args[2] # The matrix W from NMF from which we want to compare the signatures dir <- args[3] # html directory #------------------------------------------------------------------------------- # Set the variables #------------------------------------------------------------------------------- # Create the outputs output_cosineRes <- paste0(dir, "/Similarity_Matrix.txt") output_png <- paste0(dir, "/Similarity_Matrix.png") #------------------------------------------------------------------------------- # Calculate the cosine similarity and represent it with a heatmap #------------------------------------------------------------------------------- # Published signatures dataFrame1 <- read.table(published_signature_file, header=T, sep="\t") # Remove the first three colmumns (Substitution Type, Trinucleotide Somatic, Mutation Type) dataFrame1 <- dataFrame1[,4:ncol(dataFrame1)] matrix1 <- as.matrix(dataFrame1) # Unkown signatures dataFrame2 <- read.table(unknown_signature_file, header=T, sep="\t") # Remove the first two columns (alteration, context) dataFrame2 <- dataFrame2[,3:ncol(dataFrame2)] matrix2 <- as.matrix(dataFrame2) # Recover the number of new signatures NbNewSignature <- ncol(dataFrame2) - 1 # Combined the two matrices (published and unknown signatures) input_matrix_cos <- cbind(matrix1, matrix2) # Calculate the cosine similarity cosine_res <- cosine(input_matrix_cos) # Keep only the comparison between the two matrices nbSign <- ncol(matrix1)+1 # +1 for havng the first signature of the matrix1 cosine_res_subset <- cosine_res[nbSign:nrow(cosine_res), 1:ncol(matrix1)] # Save the matrix write.table(cosine_res_subset, file=output_cosineRes, quote=F, sep="\t", col.names=T, row.names=T) # Transform the matrix in a suitable format for ggplot2 cosineRes_subset_melt <- melt(cosine_res_subset) # Rename the columns colnames(cosineRes_subset_melt) <- c("Unknown_Signatures", "Published_Signatures", "Similarity") # Reorder the Signature for having the same order as in the matrix. Turn your 'signature' column into a character vector cosineRes_subset_melt$Published_Signatures <- as.character(cosineRes_subset_melt$Published_Signatures) #Then turn it back into an ordered factor cosineRes_subset_melt$Published_Signatures <- factor(cosineRes_subset_melt$Published_Signatures, levels=rev(unique(cosineRes_subset_melt$Published_Signature))) # Base plot: heatmap p1 <- ggplot(cosineRes_subset_melt, aes(x=Published_Signatures, y=Unknown_Signatures, fill=Similarity)) + geom_tile(colour="yellow") +scale_fill_gradientn(colours=c("yellow", "red")) + theme_classic() # Rename the signatures if(basename(published_signature_file) == "Frequency-COSMIC30-Hupki.txt") { p1 <- p1 + scale_x_discrete(breaks = c("Signature.1", "Signature.2", "Signature.3", "Signature.4", "Signature.5", "Signature.6", "Signature.7", "Signature.8", "Signature.9", "Signature.10", "Signature.11", "Signature.12", "Signature.13", "Signature.14", "Signature.15", "Signature.16", "Signature.17", "Signature.18", "Signature.19", "Signature.20", "Signature.21", "Signature.22", "Signature.23", "Signature.24", "Signature.25", "Signature.26", "Signature.27", "Signature.28", "Signature.29", "Signature.30", "Signature.1.MEF", "Signature.2.MEF", "Signature.3.MEF", "Signature.5.MEF"), labels = c("(Age) Sig 1", "(AID/APOBEC) Sig 2", "(BRCA1/2) Sig 3", "(Smoking) Sig 4", "Sig 5", "(DNA MMR deficiency) Sig 6", "(UV) Sig 7", "Sig 8", "(IgG) Sig 9", "(pol e) Sig 10", "(temozolomide) Sig 11", "Sig 12", "(AID/APOBEC) Sig 13", "Sig 14", "(DNA MMR deficiency) Sig 15", "Sig 16", "Sig 17", "Sig 18", "Sig 19", "(DNA MMR deficiency) Sig 20", "Sig 21", "(AA) Sig 22", "Sig 23", "(Aflatoxin) Sig 24", "Sig 25", "(DNA MMR deficiency) Sig 26", "Sig 27", "Sig 28", "(Tobacco chewing) Sig 29", "Sig 30", "(AA) Sig 1 MEF", "(AID) Sig 2 MEF", "(BaP) Sig 3 MEF", "(MNNG) Sig 5 MEF") ) } # Flipped cartesian coordinates so that horizontal becomes vertical, and vertical, horizontal p1 <- p1 + coord_flip() # Remove the x axis line p1 <- p1 + theme(axis.line.x=element_blank(), axis.line.y=element_blank()) # Add the cosine value only if >= 0.9 cosResLabel <- subset(cosineRes_subset_melt, round(cosineRes_subset_melt$Similarity, digits=2) >= 0.9) # Subset the data for keeping only the values greater thant 0.9 p1 <- p1 + geom_text(data = cosResLabel, aes(x = Published_Signatures, y = Unknown_Signatures, label = round(cosResLabel$Similarity, 2))) graphics.off() options(bitmapType='cairo') png(output_png, width=3000, height=2000, res=300) plot(p1) invisible( dev.off() )