comparison R/compareSignature_Galaxy.r @ 0:8c682b3a7c5b draft

Uploaded
author iarc
date Tue, 19 Apr 2016 03:07:11 -0400
parents
children 916846f73e25
comparison
equal deleted inserted replaced
-1:000000000000 0:8c682b3a7c5b
1 #!/usr/bin/Rscript
2
3 #-----------------------------------#
4 # Author: Maude #
5 # Script: compareSignature_Galaxy.r #
6 # Last update: 29/10/15 #
7 #-----------------------------------#
8
9
10 #########################################################################################################################################
11 # Compare new signatures with published one using the cosine similarity method #
12 #########################################################################################################################################
13
14
15 #-------------------------------------------------------------------------------
16 # Print a usage message if there is no argument pass to the command line
17 #-------------------------------------------------------------------------------
18 args <- commandArgs(TRUE)
19 usage <- function()
20 {
21 msg <- paste0('Usage:\n',
22 ' compareSignature_Galaxy.r Published_Signature New_Signature Output_Folder\n'
23 )
24 cat(msg, '\n', file="/dev/stderr")
25 quit(status=1)
26 }
27
28 input = args[length(args)]
29
30 if (length(args) == 0) { usage() }
31
32
33 #-------------------------------------------------------------------------------
34 # Load library
35 #-------------------------------------------------------------------------------
36 suppressMessages(suppressWarnings(library(lsa)))
37 suppressMessages(suppressWarnings(library(ggplot2)))
38 suppressMessages(suppressWarnings(library(reshape)))
39
40 #-------------------------------------------------------------------------------
41 # Recover the arguments
42 #-------------------------------------------------------------------------------
43 published_signature_file <- args[1] # The matrix with the published signatures
44 unknown_signature_file <- args[2] # The matrix W from NMF from which we want to compare the signatures
45 dir <- args[3] # html directory
46
47
48 #-------------------------------------------------------------------------------
49 # Set the variables
50 #-------------------------------------------------------------------------------
51 # Create the outputs
52 output_cosineRes <- paste0(dir, "/Similarity_Matrix.txt")
53 output_png <- paste0(dir, "/Similarity_Matrix.png")
54
55
56 #-------------------------------------------------------------------------------
57 # Calculate the cosine similarity and represent it with a heatmap
58 #-------------------------------------------------------------------------------
59 # Published signatures
60 dataFrame1 <- read.table(published_signature_file, header=T, sep="\t")
61 # Remove the first three colmumns (Substitution Type, Trinucleotide Somatic, Mutation Type)
62 dataFrame1 <- dataFrame1[,4:ncol(dataFrame1)]
63 matrix1 <- as.matrix(dataFrame1)
64
65 # Unkown signatures
66 dataFrame2 <- read.table(unknown_signature_file, header=T, sep="\t")
67 # Remove the first two columns (alteration, context)
68 dataFrame2 <- dataFrame2[,3:ncol(dataFrame2)]
69 matrix2 <- as.matrix(dataFrame2)
70 # Recover the number of new signatures
71 NbNewSignature <- ncol(dataFrame2) - 1
72
73 # Combined the two matrices (published and unknown signatures)
74 input_matrix_cos <- cbind(matrix1, matrix2)
75 # Calculate the cosine similarity
76 cosine_res <- cosine(input_matrix_cos)
77
78 # Keep only the comparison between the two matrices
79 nbSign <- ncol(matrix1)+1 # +1 for havng the first signature of the matrix1
80 cosine_res_subset <- cosine_res[nbSign:nrow(cosine_res), 1:ncol(matrix1)]
81
82 # Save the matrix
83 write.table(cosine_res_subset, file=output_cosineRes, quote=F, sep="\t", col.names=T, row.names=T)
84
85 # Transform the matrix in a suitable format for ggplot2
86 cosineRes_subset_melt <- melt(cosine_res_subset)
87 # Rename the columns
88 colnames(cosineRes_subset_melt) <- c("Unknown_Signatures", "Published_Signatures", "Similarity")
89 # Reorder the Signature for having the same order as in the matrix. Turn your 'signature' column into a character vector
90 cosineRes_subset_melt$Published_Signatures <- as.character(cosineRes_subset_melt$Published_Signatures)
91 #Then turn it back into an ordered factor
92 cosineRes_subset_melt$Published_Signatures <- factor(cosineRes_subset_melt$Published_Signatures, levels=rev(unique(cosineRes_subset_melt$Published_Signature)))
93
94 # Base plot: heatmap
95 p1 <- ggplot(cosineRes_subset_melt, aes(x=Published_Signatures, y=Unknown_Signatures, fill=Similarity)) + geom_tile(colour="yellow") +scale_fill_gradientn(colours=c("yellow", "red")) + theme_classic()
96
97 # Rename the signatures
98 if(basename(published_signature_file) == "Frequency-COSMICv72-Hupki.txt")
99 {
100 p1 <- p1 + scale_x_discrete(breaks = c("Signature.1", "Signature.2", "Signature.3", "Signature.4", "Signature.5", "Signature.6", "Signature.7", "Signature.8", "Signature.9",
101 "Signature.10", "Signature.11", "Signature.12", "Signature.13", "Signature.14", "Signature.15", "Signature.16", "Signature.17",
102 "Signature.18", "Signature.19", "Signature.20", "Signature.21", "Signature.22", "Signature.23", "Signature.24", "Signature.25",
103 "Signature.26", "Signature.27", "Signature.28", "Signature.29", "Signature.30",
104 "Signature.1.MEF", "Signature.2.MEF", "Signature.3.MEF", "Signature.5.MEF"),
105 labels = c("(Age) Sign 1", "(AID/APOBEC) Sign 2", "(BRCA1/2) Sign 3", "(Smoking) Sign 4", "Sign 5", "(DNA MMR deficiency) Sign 6", "(UV) Sign 7",
106 "Sign 8", "(IgG) Sign 9", "(pol e) Sign 10", "(temozolomide) Sign 11", "Sign 12", "(AID/APOBEC) Sign 13", "Sign 14",
107 "(DNA MMR deficiency) Sign 15", "Sign 16", "Sign 17", "Sign 18", "Sign 19", "(DNA MMR deficiency) Sign 20", "Sign 21", "(AA) Sign 22",
108 "Sign 23", "(Aflatoxin) Sign 24", "Sign 25", "(DNA MMR deficiency) Sign 26", "Sign 27", "Sign 28", "(Tobacco chewing) Sign 29", "Sign 30",
109 "(AA) Sign 1 MEF", "(AID) Sign 2 MEF", "(BaP) Sign 3 MEF", "(MNNG) Sign 5 MEF")
110 )
111 }
112
113 # Flipped cartesian coordinates so that horizontal becomes vertical, and vertical, horizontal
114 p1 <- p1 + coord_flip()
115 # Remove the x axis line
116 p1 <- p1 + theme(axis.line.x=element_blank(), axis.line.y=element_blank())
117 # Add the cosine value only if >= 0.9
118 cosResLabel <- subset(cosineRes_subset_melt, round(cosineRes_subset_melt$Similarity, digits=2) >= 0.9) # Subset the data for keeping only the values greater thant 0.9
119 p1 <- p1 + geom_text(data = cosResLabel, aes(x = Published_Signatures, y = Unknown_Signatures, label = round(cosResLabel$Similarity, 2)))
120
121 graphics.off()
122 options(bitmapType='cairo')
123 png(output_png, width=3000, height=2000, res=300)
124 plot(p1)
125 invisible( dev.off() )