annotate R/somaticSignature_Galaxy.r @ 6:46a10309dfe2 draft

Uploaded
author iarc
date Tue, 28 Jun 2016 02:59:32 -0400
parents 8c682b3a7c5b
children eda59b985b1c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
1 #!/usr/bin/Rscript
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
2
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
3 #-----------------------------------#
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
4 # Author: Maude #
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
5 # Script: somaticSignature_Galaxy.r #
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
6 # Last update: 29/07/15 #
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
7 #-----------------------------------#
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
8
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
9
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
10 #########################################################################################################################################
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
11 # Run NMF algorithm and represent the composition of somatic signatures and the contribution in each samples #
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
12 #########################################################################################################################################
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
13
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
14 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
15 # Load library for recovering the arguments
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
16 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
17 suppressMessages(suppressWarnings(require("getopt")))
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
18
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
19
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
20 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
21 # Recover the arguments
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
22 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
23 spec = matrix(c(
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
24 "input" , "i", 1, "character",
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
25 "nbSignature", "nbSign", 1, "integer",
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
26 "cpu", "cpu", 1, "integer",
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
27 "output", "o", 1, "character",
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
28 "help", "h", 0, "logical"
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
29 ),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
30 byrow=TRUE, ncol=4
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
31 )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
32
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
33 opt = getopt(spec);
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
34
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
35 # No argument is pass to the command line
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
36 if(length(opt) == 1)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
37 {
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
38 cat(paste("Usage:\n somaticSignature_Galaxy.r --input <matrix> --nbSignature <nbSign> --cpu <cpu> --output <outputdir>\n",sep=""))
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
39 q(status=1)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
40 }
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
41
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
42 # Help was asked for.
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
43 if ( !is.null(opt$help) )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
44 {
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
45 # print a friendly message and exit with a non-zero error code
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
46 cat(paste("Usage:\n somaticSignature_Galaxy.r --input <matrix> --nbSignature <nbSign> --cpu <cpu> --output <outputdir>\n",sep=""))
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
47 q(status=1)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
48 }
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
49
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
50
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
51
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
52 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
53 # Load library
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
54 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
55 suppressMessages(suppressWarnings(library(NMF)))
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
56 suppressMessages(suppressWarnings(library(ggplot2)))
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
57 suppressMessages(suppressWarnings(library(reshape)))
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
58 suppressMessages(suppressWarnings(library(grid)))
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
59 suppressMessages(suppressWarnings(library(scales))) # Set the maximum value to the y axis (graph composition somatic signature)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
60 suppressMessages(suppressWarnings(library(gridExtra))) # function "unit"
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
61
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
62
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
63
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
64 ###############################################################################
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
65 # Load the functions #
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
66 ###############################################################################
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
67
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
68 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
69 # Set the font depending on X11 availability
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
70 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
71 font <- ""
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
72 # Check the device available
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
73 device <- capabilities()
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
74 # X11 is available
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
75 if(device[5]) { font <- "Helvetica" } else { font <- "Helvetica-Narrow" }
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
76
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
77 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
78 # My own theme
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
79 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
80 theme_custom <- function(base_size = 4, base_family = "")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
81 {
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
82 # Starts with theme_grey and then modify some parts
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
83 theme_grey(base_size = base_size, base_family = base_family) %+replace%
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
84 theme(
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
85 axis.text = element_text(size = rel(0.8), family=font),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
86 axis.ticks = element_line(colour = "black", size=.2),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
87 axis.line = element_line(colour = "black", size = .2),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
88 axis.ticks.length= unit(.05, "cm"),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
89 axis.ticks.margin= unit(.05, "cm"), # space between tick mark and tick label (‘unit’)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
90 legend.key.size = unit(.2, "cm"),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
91 legend.margin = unit(-.5, "cm"),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
92 panel.background = element_blank(),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
93 panel.border = element_blank(),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
94 panel.grid.major = element_blank(),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
95 panel.grid.minor = element_blank(),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
96 strip.text.y = element_text(size = 3)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
97 )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
98 }
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
99
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
100 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
101 # Customize the theme for adding a y axis
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
102 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
103 mytheme <- theme_custom()
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
104 mytheme$axis.line.x <- mytheme$axis.line.y <- mytheme$axis.line
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
105 mytheme$axis.line.x$colour <- 'white'
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
106
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
107 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
108 # Replace the signature number by alphabet letter
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
109 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
110 ConvertNb2Aphabet <- function(c)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
111 {
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
112 if(c == "row1" || c == "col1") { c <- "A" } else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
113 if(c == "row2" || c == "col2") { c <- "B"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
114 if(c == "row3" || c == "col3") { c <- "C"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
115 if(c == "row4" || c == "col4") { c <- "D"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
116 if(c == "row5" || c == "col5") { c <- "E"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
117 if(c == "row6" || c == "col6") { c <- "F"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
118 if(c == "row7" || c == "col7") { c <- "G"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
119 if(c == "row8" || c == "col8") { c <- "H"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
120 if(c == "row9" || c == "col9") { c <- "I"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
121 if(c == "row10" || c == "col10") { c <- "J"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
122 if(c == "row11" || c == "col11") { c <- "K"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
123 if(c == "row12" || c == "col12") { c <- "L"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
124 if(c == "row13" || c == "col13") { c <- "M"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
125 if(c == "row14" || c == "col14") { c <- "N"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
126 if(c == "row15" || c == "col15") { c <- "O"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
127 if(c == "row16" || c == "col16") { c <- "P"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
128 if(c == "row17" || c == "col17") { c <- "Q"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
129 if(c == "row18" || c == "col18") { c <- "R"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
130 if(c == "row19" || c == "col19") { c <- "S"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
131 if(c == "row20" || c == "col20") { c <- "T"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
132 if(c == "row21" || c == "col21") { c <- "U"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
133 if(c == "row22" || c == "col22") { c <- "V"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
134 if(c == "row23" || c == "col23") { c <- "W"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
135 if(c == "row24" || c == "col24") { c <- "X"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
136 if(c == "row25" || c == "col25") { c <- "Y"} else
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
137 if(c == "row26" || c == "col26") { c <- "Z"} else { c <- c }
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
138 }
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
139
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
140 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
141 # Check the file doesn't have lines equal to zero
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
142 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
143 CheckFile <- function(rowsum, dataFrame, x)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
144 {
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
145 if(rowsum == 0)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
146 {
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
147 write("\n\nERROR: There is not enough mutations for running NMF!!!", stderr())
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
148 write(paste0("Input matrix contains at least one null row ", rownames(dataFrame)[x], "\n\n"), stderr())
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
149 stop()
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
150 }
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
151 }
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
152
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
153 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
154 # Contribution to Signature as the number of SBS per sample
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
155 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
156 Contri2SignSBS <- function(Total_SBS, Percent)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
157 {
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
158 Total_SBS*Percent/100
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
159 }
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
160
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
161 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
162 # Combined two plots and share the legend
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
163 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
164 grid_arrange_shared_legend <- function(...)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
165 {
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
166 plots <- list(...)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
167 g <- ggplotGrob(plots[[1]] + theme(legend.position="bottom"))$grobs
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
168 legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
169 lheight <- sum(legend$height)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
170 grid.arrange(
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
171 do.call(arrangeGrob, lapply(plots, function(x)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
172 x + theme(legend.position="none"))),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
173 legend,
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
174 ncol = 1,
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
175 heights = unit.c(unit(1, "npc") - lheight, lheight))
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
176 }
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
177
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
178 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
179 # Calculate the mean of each signatures in each cluster
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
180 #-------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
181 meanCluster <- function(df)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
182 {
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
183 max <- opt$nbSignature+1
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
184 sapply(2:max, function(x) { round(mean(as.numeric(as.matrix(df[,x]))), 2) } )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
185 }
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
186
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
187
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
188
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
189
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
190 ###############################################################################
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
191 # Check file #
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
192 ###############################################################################
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
193
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
194 # The input musn't contains lines equal to zero !!!
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
195 matrixNMF <- read.table(opt$input, header=T)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
196 # suppresses the return of sapply function
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
197 invisible( sapply(1:nrow(matrixNMF), function(x) { CheckFile(rowSums(matrixNMF)[x], matrixNMF, x) } ) )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
198
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
199
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
200
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
201 ###############################################################################
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
202 # Run NMF #
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
203 ###############################################################################
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
204
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
205 # Create the output directories
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
206 output_NMF <- paste0(opt$output, "/NMF")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
207 dir.create(output_NMF)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
208 output_Figures <- paste0(output_NMF, "/", "Figures")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
209 dir.create(output_Figures)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
210 output_Files <- paste0(output_NMF, "/", "Files")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
211 dir.create(output_Files)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
212
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
213 # Define the output filenames
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
214 output_cluster <- paste0(output_Files, "/", "Cluster_MixtureCoeff.txt")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
215 figure_cluster <- paste0(output_Figures, "/", "Heatmap_MixtureCoeff.png")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
216 output_matrixW <- paste0(output_Files, "/", "MatrixW-Normto100.txt")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
217 output_matrixW_ggplot2 <- paste0(output_Files, "/", "MatrixW-Inputggplot2.txt")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
218 output_matrixH_ggplot2 <- paste0(output_Files, "/", "MatrixH-Inputggplot2.txt")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
219 output_matrixH_cluster <- paste0(output_Files, "/", "Average_ContriByCluster.txt")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
220 figure_matrixW_png <- paste0(output_Figures, "/", "CompositionSomaticMutation.png")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
221 figure_matrixH_png <- paste0(output_Figures, "/", "ContributionMutationSignature.png")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
222 figure_matrixH_cluster <- paste0(output_Figures, "/", "Average_ContriByCluster.png")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
223
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
224
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
225 # Run NMF
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
226 # request a certain number of cores to use .opt="vP4"
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
227 nbCPU <- paste0("vP", opt$cpu)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
228 res <- nmf(matrixNMF, opt$nbSignature, "brunet", nrun=200, .opt=nbCPU)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
229
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
230 # If there is more than 300 samples the creation of the heatmap returns an error
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
231 if(ncol(matrixNMF) <= 300)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
232 {
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
233 # Save the clustered heatmap generated by NMF
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
234 graphics.off() # close graphics windows
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
235 options(bitmapType='cairo')
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
236 png(figure_cluster)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
237 coefmap(res, Colv="consensus")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
238 dev.off()
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
239 }
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
240
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
241 # Recover the matrix W and H
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
242 matrixW <- basis(res)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
243 matrixH <- coef(res)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
244
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
245 # Recover the cluster of the samples
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
246 matrix_cluster <- cbind(as.numeric(predict(res, what="samples")), colnames(matrixNMF))
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
247 colnames(matrix_cluster) <- c("Cluster", "Samples")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
248
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
249 ## Save the cluster matrix
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
250 write.table(matrix_cluster, file=output_cluster, quote=F, sep="\t", col.names=T, row.names=F)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
251
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
252
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
253
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
254 ###############################################################################
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
255 # Composition of somatic signatures #
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
256 ###############################################################################
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
257
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
258 # Normalize to 100%
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
259 matrixW_norm <- t((t(matrixW)/colSums(matrixW))*100)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
260 # Add a column name
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
261 colnames(matrixW_norm) <- colnames(matrixW_norm, do.NULL = FALSE, prefix = "col")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
262 # Replace the name of the columns by the signature name
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
263 colnames(matrixW_norm) <- sapply(1:length(colnames(matrixW_norm)), function(x) { ConvertNb2Aphabet(colnames(matrixW_norm)[x]) } )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
264
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
265 # Split the sequence context from the mutation type
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
266 context <- c() # Create an empty vector for the sequence context
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
267 alteration <- c() # Create an empty vector for the mutation type
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
268 for(i in 1:nrow(matrixW_norm))
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
269 {
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
270 temp <- strsplit((strsplit(rownames(matrixW_norm)[i], ""))[[1]], "")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
271
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
272 context[i] <- paste0(temp[1], "_", temp[7])
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
273 alteration[i] <- paste0(temp[3], temp[4], temp[5])
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
274 }
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
275
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
276 # Melt the matrix using the signatures as variable
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
277 matrixW_melt <- melt(matrixW_norm)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
278
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
279 # Add columns for the mutation type and the sequence context
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
280 matrixW_norm <- cbind(matrixW_norm, alteration, context)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
281 # Reorder (alteration) for having the same order as in the matrice of published signatures
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
282 matrixW_norm <- matrixW_norm[order(matrixW_norm[,"alteration"], matrixW_norm[,"context"]), ]
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
283 # Reorder (columns) for having the same order as in the matrice of published signatures
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
284 matrixW_norm <- cbind(matrixW_norm[,c("alteration", "context")], matrixW_norm[,1:(ncol(matrixW_norm)-2)]) # Put the column alteration and context at the begining
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
285 # Save the matrix
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
286 write.table(matrixW_norm, file=output_matrixW, quote=F, sep="\t", col.names=T, row.names=F)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
287
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
288 # Add columns for the mutation type and the sequence context
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
289 matrixW_melt <- cbind(matrixW_melt, alteration)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
290 matrixW_melt <- cbind(matrixW_melt, context)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
291 # Rename the columns
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
292 colnames(matrixW_melt) <- c("", "Signature", "value", "alteration", "context")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
293
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
294 # Save the input for ggplot2
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
295 input_ggplot2 <- as.matrix(matrixW_melt)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
296 input_ggplot2 <- input_ggplot2[,2:ncol(input_ggplot2)]
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
297 write.table(input_ggplot2, file=output_matrixW_ggplot2, quote=F, sep="\t", col.names=T, row.names=F)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
298
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
299 # Maximum value of the y axis
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
300 max_matrixW <- as.numeric(max(matrixW_melt$value))
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
301
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
302
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
303 # Base plot
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
304 p <- ggplot(matrixW_melt, aes(x=context, y=value, fill=alteration)) + geom_bar(stat="identity", width=0.5) + facet_grid(Signature ~ alteration, scales="free_y")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
305 # Color the mutation types
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
306 p <- p + scale_fill_manual(values=c("blue", "black", "red", "#828282", "#00CC33", "pink"))
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
307 # Remove the legend
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
308 p <- p + guides(fill=FALSE)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
309 # Customized theme (no background, no facet grid and strip, y axis only)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
310 p <- p + mytheme
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
311 # Remove the title of the x facet strip
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
312 p <- p + theme(strip.text.x=element_blank())
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
313 # Remove the x axis ticks and title
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
314 p <- p + theme(axis.title.x=element_blank(), axis.ticks.x = element_blank(), axis.title.y=element_text(size=5))
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
315 # Rename the y axis
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
316 p <- p + ylab("% contribution to signatures")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
317 # Set the maximum value of the y axis to the maximum value of the matrix W
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
318 p <- p + scale_y_continuous(limits=c(0,max_matrixW), oob=squish, breaks=c(0,round(max_matrixW)))
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
319 # Save some space for adding the sequence context at the bottom
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
320 p <- p + theme(plot.margin=unit(c(.3, 0, 0, 0), "cm"))
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
321 p <- p + scale_x_discrete(breaks = c("A_A","A_C","A_G","A_T", "C_A","C_C","C_G","C_T", "G_A","G_C","G_G","G_T", "T_A","T_C","T_G","T_T"),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
322 labels =c('A\nA',"\nC","\nG","\nT", 'C\nA',"\nC","\nG","\nT",
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
323 'G\nA',"\nC","\nG","\nT", 'T\nA',"\nC","\nG","\nT")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
324 )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
325
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
326
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
327 #------------------------------------------------------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
328 # Change the color of the facets for the mutation type
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
329 #------------------------------------------------------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
330 cols <- rep( c("blue", "black", "red", "#828282", "#00CC33", "pink")) # Facet strip colours
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
331
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
332 # Make a grob object
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
333 Pg <- ggplotGrob(p)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
334 # To keep track of strip.background grobs
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
335 idx <- 0
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
336 # Find each strip.background and alter its backround colour
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
337 for( g in 1:length(Pg$grobs) )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
338 {
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
339 if( grepl( "strip.absoluteGrob" , Pg$grobs[[g]]$name ) )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
340 {
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
341 idx <- idx + 1
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
342 sb <- which( grepl( "strip\\.background" , names( Pg$grobs[[g]]$children ) ) )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
343 Pg$grobs[[g]]$children[[sb]][]$gp$fill <- cols[idx]
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
344 }
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
345 }
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
346
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
347 # Reduce the size of the facet strip
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
348 Pg$heights[[3]] = unit(.05,"cm")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
349
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
350
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
351 #------------------------------------------------------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
352 # Save the graph in a png file
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
353 #------------------------------------------------------------------------------------------------------------------------------
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
354 options(bitmapType='cairo')
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
355 png(figure_matrixW_png, width=1300, heigh=500, res=300, pointsize = 4)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
356 plot(Pg)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
357 ## Add label for the mutation type above the strip facet
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
358 grid.text(0.12, unit(1,"npc") - unit(1.4,"line"), label="C>A")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
359 grid.text(0.27, unit(1,"npc") - unit(1.4,"line"), label="C>G")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
360 grid.text(0.42, unit(1,"npc") - unit(1.4,"line"), label="C>T")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
361 grid.text(0.58, unit(1,"npc") - unit(1.4,"line"), label="T>A")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
362 grid.text(0.74, unit(1,"npc") - unit(1.4,"line"), label="T>C")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
363 grid.text(0.89, unit(1,"npc") - unit(1.4,"line"), label="T>G")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
364 invisible( dev.off() )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
365
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
366
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
367
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
368 ###############################################################################
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
369 # Contribution of mutational signature in each samples #
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
370 ###############################################################################
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
371
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
372 # Recover the total number of SBS per samples
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
373 NbSBS <- colSums(matrixNMF)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
374 # Normalized matrix H to 100%
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
375 matrixH_norm <- t((t(matrixH)/colSums(matrixH))*100)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
376 # Add a row name
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
377 rownames(matrixH_norm) <- rownames(matrixH_norm, do.NULL = FALSE, prefix = "row")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
378 # Replace the signature number by letter
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
379 rownames(matrixH_norm) <- sapply(1:length(rownames(matrixH_norm)), function(x) { ConvertNb2Aphabet(rownames(matrixH_norm)[x]) } )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
380
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
381 ## Combined the contribution with the total number of SBS
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
382 matrixH_norm_melt <- melt(matrixH_norm)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
383 matrixH_norm_melt <- cbind(matrixH_norm_melt, rep(NbSBS, each = opt$nbSignature))
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
384 colnames(matrixH_norm_melt) <- c("Signature", "Sample", "Value", "Total_SBS")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
385
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
386 # Calculate the contribution in number of SBS
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
387 matrixH_norm_melt$ContriSBS <- sapply(1:nrow(matrixH_norm_melt), function(x) { Contri2SignSBS(matrixH_norm_melt$Total_SBS[x], matrixH_norm_melt$Value[x]) } )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
388
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
389
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
390 # Save the matrix
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
391 write.table(matrixH_norm_melt, file=output_matrixH_ggplot2, quote=F, sep="\t", col.names=T, row.names=F)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
392
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
393 # Base plot for the contribution of each samples according the count of mutations
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
394 p2 <- ggplot(matrixH_norm_melt, aes(x=reorder(Sample, -ContriSBS), y=ContriSBS, fill=Signature)) + geom_bar(stat="identity") + theme_classic()
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
395 # Remove the name of samples
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
396 p2 <- p2 + theme(axis.text.x = element_blank())
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
397 # Reverse the y axis
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
398 p2 <- p2 + scale_y_reverse()
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
399 # Rename the y and x axis
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
400 p2 <- p2 + ylab("Number of mutations") + xlab("Samples")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
401 # Remove the x axis line
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
402 p2 <- p2 + theme(axis.line.x=element_blank())
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
403
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
404 # Base plot for the contribution of each samples in percentages
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
405 p3 <- ggplot(matrixH_norm_melt, aes(x=reorder(Sample, -ContriSBS), y=Value, fill=Signature)) + geom_bar(stat="identity") + theme_classic() + theme(axis.text.x = element_blank()) + xlab("") + ylab("% of mutations")
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
406 # Remove the x axis line
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
407 p3 <- p3 + theme(axis.line.x=element_blank(), axis.ticks.x=element_blank())
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
408
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
409
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
410 # Plot PNG
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
411 png(figure_matrixH_png, width=3000, heigh=2000, res=300)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
412 # Combined the two plots for the contribution of the samples
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
413 suppressWarnings( grid_arrange_shared_legend(p3, p2) )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
414 invisible( dev.off() )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
415
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
416
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
417 ###############################################################################
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
418 # Average contributions of each signature in each cluster #
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
419 ###############################################################################
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
420
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
421 matrixH_cluster <- cbind(matrix_cluster[,1], t(matrixH_norm))
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
422 colnames(matrixH_cluster) <- c("Cluster", colnames(t(matrixH_norm)))
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
423
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
424 df <- as.data.frame(matrixH_cluster)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
425
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
426 tmp_mat <- sapply(1:opt$nbSignature, function(x) { meanCluster(df[df[,1] == x,]) } )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
427 # Add a name for the row and the col
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
428 rownames(tmp_mat) <- sapply(1:opt$nbSignature, function(x) { paste0("Sig. ", x) } )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
429 colnames(tmp_mat) <- sapply(1:opt$nbSignature, function(x) { paste0("Cluster ", x) } )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
430 tmp_mat <- t(tmp_mat)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
431 # Recover the number of samples in each cluster
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
432 nbSampleByCluster <- sapply(1:opt$nbSignature, function(x) { as.numeric( strsplit( as.character(dim(df[df[,1] == x,])), " " ) ) } )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
433 # Combined the average contribution and the number of samples
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
434 tmp_mat <- cbind(tmp_mat, nbSampleByCluster[1,])
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
435 # Add a name for the row and the col
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
436 colnames(tmp_mat)[opt$nbSignature+1] <- "Number of samples"
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
437 # Save the matrix
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
438 write.table(tmp_mat, file=output_matrixH_cluster, quote=F, sep="\t", col.names=T, row.names=T)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
439
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
440 ## Create an image of the table with ggplot2
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
441 # Dummy plot
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
442 p4 <- qplot(1:10, 1:10, geom = "blank") +
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
443 theme(panel.grid.major = element_blank(),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
444 panel.grid.minor = element_blank(),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
445 panel.border = element_rect(fill=NA,color="white", size=0.5, linetype="solid"),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
446 axis.line = element_blank(),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
447 axis.ticks = element_blank(),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
448 panel.background = element_rect(fill="white"),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
449 plot.background = element_rect(fill="white"),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
450 legend.position = "none",
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
451 axis.text = element_blank(),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
452 axis.title = element_blank()
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
453 )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
454 # Adding a table
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
455 p4 <- p4 + annotation_custom(grob = tableGrob(tmp_mat),
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
456 xmin = 4, xmax = 7,
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
457 ymin = 0, ymax = 10)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
458
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
459 # Save the table
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
460 png(figure_matrixH_cluster, width=2500, heigh=1000, res=300)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
461 # Combined the two plots for the contribution of the samples
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
462 plot(p4)
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
463 invisible( dev.off() )
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
464
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
465
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
466 # Delete the empty plot created by the script
46a10309dfe2 Uploaded
iarc
parents: 0
diff changeset
467 if (file.exists("Rplots.pdf")) invisible( file.remove("Rplots.pdf") )