Mercurial > repos > iarc > mutspec
comparison R/compareSignature_Galaxy.r @ 0:8c682b3a7c5b draft
Uploaded
author | iarc |
---|---|
date | Tue, 19 Apr 2016 03:07:11 -0400 |
parents | |
children | 916846f73e25 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8c682b3a7c5b |
---|---|
1 #!/usr/bin/Rscript | |
2 | |
3 #-----------------------------------# | |
4 # Author: Maude # | |
5 # Script: compareSignature_Galaxy.r # | |
6 # Last update: 29/10/15 # | |
7 #-----------------------------------# | |
8 | |
9 | |
10 ######################################################################################################################################### | |
11 # Compare new signatures with published one using the cosine similarity method # | |
12 ######################################################################################################################################### | |
13 | |
14 | |
15 #------------------------------------------------------------------------------- | |
16 # Print a usage message if there is no argument pass to the command line | |
17 #------------------------------------------------------------------------------- | |
18 args <- commandArgs(TRUE) | |
19 usage <- function() | |
20 { | |
21 msg <- paste0('Usage:\n', | |
22 ' compareSignature_Galaxy.r Published_Signature New_Signature Output_Folder\n' | |
23 ) | |
24 cat(msg, '\n', file="/dev/stderr") | |
25 quit(status=1) | |
26 } | |
27 | |
28 input = args[length(args)] | |
29 | |
30 if (length(args) == 0) { usage() } | |
31 | |
32 | |
33 #------------------------------------------------------------------------------- | |
34 # Load library | |
35 #------------------------------------------------------------------------------- | |
36 suppressMessages(suppressWarnings(library(lsa))) | |
37 suppressMessages(suppressWarnings(library(ggplot2))) | |
38 suppressMessages(suppressWarnings(library(reshape))) | |
39 | |
40 #------------------------------------------------------------------------------- | |
41 # Recover the arguments | |
42 #------------------------------------------------------------------------------- | |
43 published_signature_file <- args[1] # The matrix with the published signatures | |
44 unknown_signature_file <- args[2] # The matrix W from NMF from which we want to compare the signatures | |
45 dir <- args[3] # html directory | |
46 | |
47 | |
48 #------------------------------------------------------------------------------- | |
49 # Set the variables | |
50 #------------------------------------------------------------------------------- | |
51 # Create the outputs | |
52 output_cosineRes <- paste0(dir, "/Similarity_Matrix.txt") | |
53 output_png <- paste0(dir, "/Similarity_Matrix.png") | |
54 | |
55 | |
56 #------------------------------------------------------------------------------- | |
57 # Calculate the cosine similarity and represent it with a heatmap | |
58 #------------------------------------------------------------------------------- | |
59 # Published signatures | |
60 dataFrame1 <- read.table(published_signature_file, header=T, sep="\t") | |
61 # Remove the first three colmumns (Substitution Type, Trinucleotide Somatic, Mutation Type) | |
62 dataFrame1 <- dataFrame1[,4:ncol(dataFrame1)] | |
63 matrix1 <- as.matrix(dataFrame1) | |
64 | |
65 # Unkown signatures | |
66 dataFrame2 <- read.table(unknown_signature_file, header=T, sep="\t") | |
67 # Remove the first two columns (alteration, context) | |
68 dataFrame2 <- dataFrame2[,3:ncol(dataFrame2)] | |
69 matrix2 <- as.matrix(dataFrame2) | |
70 # Recover the number of new signatures | |
71 NbNewSignature <- ncol(dataFrame2) - 1 | |
72 | |
73 # Combined the two matrices (published and unknown signatures) | |
74 input_matrix_cos <- cbind(matrix1, matrix2) | |
75 # Calculate the cosine similarity | |
76 cosine_res <- cosine(input_matrix_cos) | |
77 | |
78 # Keep only the comparison between the two matrices | |
79 nbSign <- ncol(matrix1)+1 # +1 for havng the first signature of the matrix1 | |
80 cosine_res_subset <- cosine_res[nbSign:nrow(cosine_res), 1:ncol(matrix1)] | |
81 | |
82 # Save the matrix | |
83 write.table(cosine_res_subset, file=output_cosineRes, quote=F, sep="\t", col.names=T, row.names=T) | |
84 | |
85 # Transform the matrix in a suitable format for ggplot2 | |
86 cosineRes_subset_melt <- melt(cosine_res_subset) | |
87 # Rename the columns | |
88 colnames(cosineRes_subset_melt) <- c("Unknown_Signatures", "Published_Signatures", "Similarity") | |
89 # Reorder the Signature for having the same order as in the matrix. Turn your 'signature' column into a character vector | |
90 cosineRes_subset_melt$Published_Signatures <- as.character(cosineRes_subset_melt$Published_Signatures) | |
91 #Then turn it back into an ordered factor | |
92 cosineRes_subset_melt$Published_Signatures <- factor(cosineRes_subset_melt$Published_Signatures, levels=rev(unique(cosineRes_subset_melt$Published_Signature))) | |
93 | |
94 # Base plot: heatmap | |
95 p1 <- ggplot(cosineRes_subset_melt, aes(x=Published_Signatures, y=Unknown_Signatures, fill=Similarity)) + geom_tile(colour="yellow") +scale_fill_gradientn(colours=c("yellow", "red")) + theme_classic() | |
96 | |
97 # Rename the signatures | |
98 if(basename(published_signature_file) == "Frequency-COSMICv72-Hupki.txt") | |
99 { | |
100 p1 <- p1 + scale_x_discrete(breaks = c("Signature.1", "Signature.2", "Signature.3", "Signature.4", "Signature.5", "Signature.6", "Signature.7", "Signature.8", "Signature.9", | |
101 "Signature.10", "Signature.11", "Signature.12", "Signature.13", "Signature.14", "Signature.15", "Signature.16", "Signature.17", | |
102 "Signature.18", "Signature.19", "Signature.20", "Signature.21", "Signature.22", "Signature.23", "Signature.24", "Signature.25", | |
103 "Signature.26", "Signature.27", "Signature.28", "Signature.29", "Signature.30", | |
104 "Signature.1.MEF", "Signature.2.MEF", "Signature.3.MEF", "Signature.5.MEF"), | |
105 labels = c("(Age) Sign 1", "(AID/APOBEC) Sign 2", "(BRCA1/2) Sign 3", "(Smoking) Sign 4", "Sign 5", "(DNA MMR deficiency) Sign 6", "(UV) Sign 7", | |
106 "Sign 8", "(IgG) Sign 9", "(pol e) Sign 10", "(temozolomide) Sign 11", "Sign 12", "(AID/APOBEC) Sign 13", "Sign 14", | |
107 "(DNA MMR deficiency) Sign 15", "Sign 16", "Sign 17", "Sign 18", "Sign 19", "(DNA MMR deficiency) Sign 20", "Sign 21", "(AA) Sign 22", | |
108 "Sign 23", "(Aflatoxin) Sign 24", "Sign 25", "(DNA MMR deficiency) Sign 26", "Sign 27", "Sign 28", "(Tobacco chewing) Sign 29", "Sign 30", | |
109 "(AA) Sign 1 MEF", "(AID) Sign 2 MEF", "(BaP) Sign 3 MEF", "(MNNG) Sign 5 MEF") | |
110 ) | |
111 } | |
112 | |
113 # Flipped cartesian coordinates so that horizontal becomes vertical, and vertical, horizontal | |
114 p1 <- p1 + coord_flip() | |
115 # Remove the x axis line | |
116 p1 <- p1 + theme(axis.line.x=element_blank(), axis.line.y=element_blank()) | |
117 # Add the cosine value only if >= 0.9 | |
118 cosResLabel <- subset(cosineRes_subset_melt, round(cosineRes_subset_melt$Similarity, digits=2) >= 0.9) # Subset the data for keeping only the values greater thant 0.9 | |
119 p1 <- p1 + geom_text(data = cosResLabel, aes(x = Published_Signatures, y = Unknown_Signatures, label = round(cosResLabel$Similarity, 2))) | |
120 | |
121 graphics.off() | |
122 options(bitmapType='cairo') | |
123 png(output_png, width=3000, height=2000, res=300) | |
124 plot(p1) | |
125 invisible( dev.off() ) |