0
|
1 #!/usr/bin/Rscript
|
|
2
|
|
3 #-----------------------------------#
|
|
4 # Author: Maude #
|
|
5 # Script: compareSignature_Galaxy.r #
|
|
6 # Last update: 29/10/15 #
|
|
7 #-----------------------------------#
|
|
8
|
|
9
|
|
10 #########################################################################################################################################
|
|
11 # Compare new signatures with published one using the cosine similarity method #
|
|
12 #########################################################################################################################################
|
|
13
|
|
14
|
|
15 #-------------------------------------------------------------------------------
|
|
16 # Print a usage message if there is no argument pass to the command line
|
|
17 #-------------------------------------------------------------------------------
|
|
18 args <- commandArgs(TRUE)
|
|
19 usage <- function()
|
|
20 {
|
|
21 msg <- paste0('Usage:\n',
|
|
22 ' compareSignature_Galaxy.r Published_Signature New_Signature Output_Folder\n'
|
|
23 )
|
|
24 cat(msg, '\n', file="/dev/stderr")
|
|
25 quit(status=1)
|
|
26 }
|
|
27
|
|
28 input = args[length(args)]
|
|
29
|
|
30 if (length(args) == 0) { usage() }
|
|
31
|
|
32
|
|
33 #-------------------------------------------------------------------------------
|
|
34 # Load library
|
|
35 #-------------------------------------------------------------------------------
|
|
36 suppressMessages(suppressWarnings(library(lsa)))
|
|
37 suppressMessages(suppressWarnings(library(ggplot2)))
|
|
38 suppressMessages(suppressWarnings(library(reshape)))
|
|
39
|
|
40 #-------------------------------------------------------------------------------
|
|
41 # Recover the arguments
|
|
42 #-------------------------------------------------------------------------------
|
|
43 published_signature_file <- args[1] # The matrix with the published signatures
|
|
44 unknown_signature_file <- args[2] # The matrix W from NMF from which we want to compare the signatures
|
|
45 dir <- args[3] # html directory
|
|
46
|
|
47
|
|
48 #-------------------------------------------------------------------------------
|
|
49 # Set the variables
|
|
50 #-------------------------------------------------------------------------------
|
|
51 # Create the outputs
|
|
52 output_cosineRes <- paste0(dir, "/Similarity_Matrix.txt")
|
|
53 output_png <- paste0(dir, "/Similarity_Matrix.png")
|
|
54
|
|
55
|
|
56 #-------------------------------------------------------------------------------
|
|
57 # Calculate the cosine similarity and represent it with a heatmap
|
|
58 #-------------------------------------------------------------------------------
|
|
59 # Published signatures
|
|
60 dataFrame1 <- read.table(published_signature_file, header=T, sep="\t")
|
|
61 # Remove the first three colmumns (Substitution Type, Trinucleotide Somatic, Mutation Type)
|
|
62 dataFrame1 <- dataFrame1[,4:ncol(dataFrame1)]
|
|
63 matrix1 <- as.matrix(dataFrame1)
|
|
64
|
|
65 # Unkown signatures
|
|
66 dataFrame2 <- read.table(unknown_signature_file, header=T, sep="\t")
|
|
67 # Remove the first two columns (alteration, context)
|
|
68 dataFrame2 <- dataFrame2[,3:ncol(dataFrame2)]
|
|
69 matrix2 <- as.matrix(dataFrame2)
|
|
70 # Recover the number of new signatures
|
|
71 NbNewSignature <- ncol(dataFrame2) - 1
|
|
72
|
|
73 # Combined the two matrices (published and unknown signatures)
|
|
74 input_matrix_cos <- cbind(matrix1, matrix2)
|
|
75 # Calculate the cosine similarity
|
|
76 cosine_res <- cosine(input_matrix_cos)
|
|
77
|
|
78 # Keep only the comparison between the two matrices
|
|
79 nbSign <- ncol(matrix1)+1 # +1 for havng the first signature of the matrix1
|
|
80 cosine_res_subset <- cosine_res[nbSign:nrow(cosine_res), 1:ncol(matrix1)]
|
|
81
|
|
82 # Save the matrix
|
|
83 write.table(cosine_res_subset, file=output_cosineRes, quote=F, sep="\t", col.names=T, row.names=T)
|
|
84
|
|
85 # Transform the matrix in a suitable format for ggplot2
|
|
86 cosineRes_subset_melt <- melt(cosine_res_subset)
|
|
87 # Rename the columns
|
|
88 colnames(cosineRes_subset_melt) <- c("Unknown_Signatures", "Published_Signatures", "Similarity")
|
|
89 # Reorder the Signature for having the same order as in the matrix. Turn your 'signature' column into a character vector
|
|
90 cosineRes_subset_melt$Published_Signatures <- as.character(cosineRes_subset_melt$Published_Signatures)
|
|
91 #Then turn it back into an ordered factor
|
|
92 cosineRes_subset_melt$Published_Signatures <- factor(cosineRes_subset_melt$Published_Signatures, levels=rev(unique(cosineRes_subset_melt$Published_Signature)))
|
|
93
|
|
94 # Base plot: heatmap
|
|
95 p1 <- ggplot(cosineRes_subset_melt, aes(x=Published_Signatures, y=Unknown_Signatures, fill=Similarity)) + geom_tile(colour="yellow") +scale_fill_gradientn(colours=c("yellow", "red")) + theme_classic()
|
|
96
|
|
97 # Rename the signatures
|
|
98 if(basename(published_signature_file) == "Frequency-COSMICv72-Hupki.txt")
|
|
99 {
|
|
100 p1 <- p1 + scale_x_discrete(breaks = c("Signature.1", "Signature.2", "Signature.3", "Signature.4", "Signature.5", "Signature.6", "Signature.7", "Signature.8", "Signature.9",
|
|
101 "Signature.10", "Signature.11", "Signature.12", "Signature.13", "Signature.14", "Signature.15", "Signature.16", "Signature.17",
|
|
102 "Signature.18", "Signature.19", "Signature.20", "Signature.21", "Signature.22", "Signature.23", "Signature.24", "Signature.25",
|
|
103 "Signature.26", "Signature.27", "Signature.28", "Signature.29", "Signature.30",
|
|
104 "Signature.1.MEF", "Signature.2.MEF", "Signature.3.MEF", "Signature.5.MEF"),
|
|
105 labels = c("(Age) Sign 1", "(AID/APOBEC) Sign 2", "(BRCA1/2) Sign 3", "(Smoking) Sign 4", "Sign 5", "(DNA MMR deficiency) Sign 6", "(UV) Sign 7",
|
|
106 "Sign 8", "(IgG) Sign 9", "(pol e) Sign 10", "(temozolomide) Sign 11", "Sign 12", "(AID/APOBEC) Sign 13", "Sign 14",
|
|
107 "(DNA MMR deficiency) Sign 15", "Sign 16", "Sign 17", "Sign 18", "Sign 19", "(DNA MMR deficiency) Sign 20", "Sign 21", "(AA) Sign 22",
|
|
108 "Sign 23", "(Aflatoxin) Sign 24", "Sign 25", "(DNA MMR deficiency) Sign 26", "Sign 27", "Sign 28", "(Tobacco chewing) Sign 29", "Sign 30",
|
|
109 "(AA) Sign 1 MEF", "(AID) Sign 2 MEF", "(BaP) Sign 3 MEF", "(MNNG) Sign 5 MEF")
|
|
110 )
|
|
111 }
|
|
112
|
|
113 # Flipped cartesian coordinates so that horizontal becomes vertical, and vertical, horizontal
|
|
114 p1 <- p1 + coord_flip()
|
|
115 # Remove the x axis line
|
|
116 p1 <- p1 + theme(axis.line.x=element_blank(), axis.line.y=element_blank())
|
|
117 # Add the cosine value only if >= 0.9
|
|
118 cosResLabel <- subset(cosineRes_subset_melt, round(cosineRes_subset_melt$Similarity, digits=2) >= 0.9) # Subset the data for keeping only the values greater thant 0.9
|
|
119 p1 <- p1 + geom_text(data = cosResLabel, aes(x = Published_Signatures, y = Unknown_Signatures, label = round(cosResLabel$Similarity, 2)))
|
|
120
|
|
121 graphics.off()
|
|
122 options(bitmapType='cairo')
|
|
123 png(output_png, width=3000, height=2000, res=300)
|
|
124 plot(p1)
|
|
125 invisible( dev.off() )
|