annotate R/somaticSignature_Galaxy.r @ 7:eda59b985b1c draft default tip

Uploaded
author iarc
date Mon, 13 Mar 2017 08:21:19 -0400
parents 46a10309dfe2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
1 #!/usr/bin/Rscript
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
2
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
3 #-----------------------------------#
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
4 # Author: Maude #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
5 # Script: somaticSignature_Galaxy.r #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
6 # Last update: 17/02/17 #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
7 #-----------------------------------#
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
8
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
9
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
10 #########################################################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
11 # Run NMF algorithm and represent the composition of somatic signatures and the contribution in each samples #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
12 #########################################################################################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
13
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
14 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
15 # Load library for recovering the arguments
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
16 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
17 suppressMessages(suppressWarnings(require("getopt")))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
18
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
19
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
20 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
21 # Recover the arguments
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
22 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
23 spec = matrix(c(
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
24 "input" , "i", 1, "character",
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
25 "nbSignature", "nbSign", 1, "integer",
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
26 "cpu", "cpu", 1, "integer",
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
27 "output", "o", 1, "character",
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
28 "html", "html", 0, "character",
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
29 "help", "h", 0, "logical"
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
30 ),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
31 byrow=TRUE, ncol=4
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
32 )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
33
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
34 opt = getopt(spec);
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
35
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
36 # No argument is pass to the command line
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
37 if(length(opt) == 1)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
38 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
39 cat(paste("Usage:\n somaticSignature_Galaxy.r --input <matrix> --nbSignature <nbSign> --cpu <cpu> --output <outputdir> --html <html_for_Galaxy>\n",sep=""))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
40
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
41 cat(paste0("\n--input Input matrix created with the tool MutSpec-Stat\n--nbSignature Number of signatures to extract (min = 2)\n--cpu Number of CPUs\n--output Output directory\n--html Path to HTML page (ONLY FOR GALAXY WRAPPER)\n"))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
42
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
43 q(status=1)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
44 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
45
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
46 # Help was asked for.
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
47 if ( !is.null(opt$help) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
48 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
49 # print a friendly message and exit with a non-zero error code
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
50 cat(paste("Usage:\n somaticSignature_Galaxy.r --input <matrix> --nbSignature <nbSign> --cpu <cpu> --output <outputdir> --html <html_for_Galaxy>\n",sep=""))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
51 q(status=1)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
52 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
53
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
54
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
55
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
56 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
57 # Load library
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
58 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
59 suppressMessages(suppressWarnings(library(NMF)))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
60 suppressMessages(suppressWarnings(library(ggplot2)))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
61 suppressMessages(suppressWarnings(library(reshape)))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
62 suppressMessages(suppressWarnings(library(grid)))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
63 suppressMessages(suppressWarnings(library(scales))) # Set the maximum value to the y axis (graph composition somatic signature)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
64 suppressMessages(suppressWarnings(library(gridExtra))) # function "unit"
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
65
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
66
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
67
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
68 ###############################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
69 # Load the functions #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
70 ###############################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
71
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
72 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
73 # Set the font depending on X11 availability
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
74 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
75 font <- ""
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
76 # Check the device available
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
77 device <- capabilities()
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
78 # X11 is available
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
79 if(device[5]) { font <- "Helvetica" } else { font <- "Helvetica-Narrow" }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
80
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
81 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
82 # My own theme
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
83 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
84 theme_custom <- function(base_size = 4, base_family = "")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
85 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
86 # Starts with theme_grey and then modify some parts
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
87 theme_grey(base_size = base_size, base_family = base_family) %+replace%
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
88 theme(
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
89 axis.text = element_text(size = rel(0.8), family=font),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
90 axis.ticks = element_line(colour = "black", size=.2),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
91 axis.line = element_line(colour = "black", size = .2),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
92 axis.ticks.length= unit(.05, "cm"),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
93 axis.ticks.margin= unit(.05, "cm"), # space between tick mark and tick label (‘unit’)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
94 legend.key.size = unit(.2, "cm"),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
95 legend.margin = unit(-.5, "cm"),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
96 panel.background = element_blank(),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
97 panel.border = element_blank(),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
98 panel.grid.major = element_blank(),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
99 panel.grid.minor = element_blank(),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
100 strip.text.y = element_text(size = 3)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
101 )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
102 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
103
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
104 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
105 # Customize the theme for adding a y axis
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
106 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
107 mytheme <- theme_custom()
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
108 mytheme$axis.line.x <- mytheme$axis.line.y <- mytheme$axis.line
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
109 mytheme$axis.line.x$colour <- 'white'
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
110
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
111 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
112 # Replace the signature number by alphabet letter
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
113 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
114 ConvertNb2Aphabet <- function(c)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
115 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
116 if(c == "row1" || c == "col1") { c <- "A" } else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
117 if(c == "row2" || c == "col2") { c <- "B"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
118 if(c == "row3" || c == "col3") { c <- "C"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
119 if(c == "row4" || c == "col4") { c <- "D"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
120 if(c == "row5" || c == "col5") { c <- "E"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
121 if(c == "row6" || c == "col6") { c <- "F"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
122 if(c == "row7" || c == "col7") { c <- "G"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
123 if(c == "row8" || c == "col8") { c <- "H"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
124 if(c == "row9" || c == "col9") { c <- "I"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
125 if(c == "row10" || c == "col10") { c <- "J"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
126 if(c == "row11" || c == "col11") { c <- "K"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
127 if(c == "row12" || c == "col12") { c <- "L"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
128 if(c == "row13" || c == "col13") { c <- "M"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
129 if(c == "row14" || c == "col14") { c <- "N"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
130 if(c == "row15" || c == "col15") { c <- "O"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
131 if(c == "row16" || c == "col16") { c <- "P"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
132 if(c == "row17" || c == "col17") { c <- "Q"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
133 if(c == "row18" || c == "col18") { c <- "R"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
134 if(c == "row19" || c == "col19") { c <- "S"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
135 if(c == "row20" || c == "col20") { c <- "T"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
136 if(c == "row21" || c == "col21") { c <- "U"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
137 if(c == "row22" || c == "col22") { c <- "V"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
138 if(c == "row23" || c == "col23") { c <- "W"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
139 if(c == "row24" || c == "col24") { c <- "X"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
140 if(c == "row25" || c == "col25") { c <- "Y"} else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
141 if(c == "row26" || c == "col26") { c <- "Z"} else { c <- c }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
142 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
143
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
144 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
145 # Check the file doesn't have lines equal to zero
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
146 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
147 CheckFile <- function(rowsum, dataFrame, x)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
148 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
149 if(rowsum == 0)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
150 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
151 write("\n\nERROR: There is not enough mutations for running NMF!!!", stderr())
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
152 write(paste0("Input matrix contains at least one null row ", rownames(dataFrame)[x], "\n\n"), stderr())
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
153 stop()
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
154 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
155 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
156
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
157 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
158 # Contribution to Signature as the number of SBS per sample
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
159 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
160 Contri2SignSBS <- function(Total_SBS, Percent)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
161 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
162 Total_SBS*Percent/100
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
163 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
164
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
165 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
166 # Combined two plots and share the legend
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
167 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
168 grid_arrange_shared_legend <- function(...)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
169 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
170 plots <- list(...)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
171 g <- ggplotGrob(plots[[1]] + theme(legend.position="bottom"))$grobs
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
172 legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
173 lheight <- sum(legend$height)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
174 grid.arrange(
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
175 do.call(arrangeGrob, lapply(plots, function(x)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
176 x + theme(legend.position="none"))),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
177 legend,
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
178 ncol = 1,
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
179 heights = unit.c(unit(1, "npc") - lheight, lheight))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
180 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
181
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
182 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
183 # Calculate the mean of each signatures in each cluster
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
184 #-------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
185 meanCluster <- function(df)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
186 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
187 max <- opt$nbSignature+1
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
188 sapply(2:max, function(x) { round(mean(as.numeric(as.matrix(df[,x]))), 2) } )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
189 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
190
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
191
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
192
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
193
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
194 ###############################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
195 # Check file #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
196 ###############################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
197
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
198 # The input musn't contains lines equal to zero !!!
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
199 matrixNMF <- read.table(opt$input, header=T)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
200 # suppresses the return of sapply function
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
201 invisible( sapply(1:nrow(matrixNMF), function(x) { CheckFile(rowSums(matrixNMF)[x], matrixNMF, x) } ) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
202
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
203
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
204
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
205 ###############################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
206 # Run NMF #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
207 ###############################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
208 # Create outdir
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
209 dir.create(opt$output)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
210 # Create the output directories
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
211 output_NMF <- paste0(opt$output, "/NMF")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
212 dir.create(output_NMF)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
213 output_Figures <- paste0(output_NMF, "/", "Figures")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
214 dir.create(output_Figures)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
215 output_Files <- paste0(output_NMF, "/", "Files")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
216 dir.create(output_Files)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
217
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
218 # Define the output filenames
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
219 output_cluster <- paste0(output_Files, "/", "Cluster_MixtureCoeff.txt")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
220 figure_cluster <- paste0(output_Figures, "/", "Heatmap_MixtureCoeff.png")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
221 output_matrixW <- paste0(output_Files, "/", "MatrixW-Normto100.txt")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
222 output_matrixW_ggplot2 <- paste0(output_Files, "/", "MatrixW-Inputggplot2.txt")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
223 output_matrixH_ggplot2 <- paste0(output_Files, "/", "MatrixH-Inputggplot2.txt")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
224 output_matrixH_cluster <- paste0(output_Files, "/", "Average_ContriByCluster.txt")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
225 figure_matrixW_png <- paste0(output_Figures, "/", "CompositionSomaticMutation.png")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
226 figure_matrixH_png <- paste0(output_Figures, "/", "ContributionMutationSignature.png")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
227 figure_matrixH_cluster <- paste0(output_Figures, "/", "Average_ContriByCluster.png")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
228
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
229
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
230 # Run NMF
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
231 # request a certain number of cores to use .opt="vP4"
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
232 nbCPU <- paste0("vP", opt$cpu)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
233 res <- nmf(matrixNMF, opt$nbSignature, "brunet", nrun=200, .opt=nbCPU)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
234
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
235 # If there is more than 300 samples the creation of the heatmap returns an error
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
236 if(ncol(matrixNMF) <= 300)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
237 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
238 # Save the clustered heatmap generated by NMF
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
239 graphics.off() # close graphics windows
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
240 options(bitmapType='cairo')
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
241 png(figure_cluster)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
242 coefmap(res, Colv="consensus")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
243 dev.off()
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
244 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
245
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
246 # Recover the matrix W and H
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
247 matrixW <- basis(res)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
248 matrixH <- coef(res)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
249
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
250 # Recover the cluster of the samples
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
251 matrix_cluster <- cbind(as.numeric(predict(res, what="samples")), colnames(matrixNMF))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
252 colnames(matrix_cluster) <- c("Cluster", "Samples")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
253
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
254 ## Save the cluster matrix
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
255 write.table(matrix_cluster, file=output_cluster, quote=F, sep="\t", col.names=T, row.names=F)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
256
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
257
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
258
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
259 ###############################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
260 # Composition of somatic signatures #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
261 ###############################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
262
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
263 # Normalize to 100%
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
264 matrixW_norm <- t((t(matrixW)/colSums(matrixW))*100)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
265 # Add a column name
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
266 colnames(matrixW_norm) <- colnames(matrixW_norm, do.NULL = FALSE, prefix = "col")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
267 # Replace the name of the columns by the signature name
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
268 colnames(matrixW_norm) <- sapply(1:length(colnames(matrixW_norm)), function(x) { ConvertNb2Aphabet(colnames(matrixW_norm)[x]) } )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
269
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
270 # Split the sequence context from the mutation type
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
271 context <- c() # Create an empty vector for the sequence context
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
272 alteration <- c() # Create an empty vector for the mutation type
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
273 for(i in 1:nrow(matrixW_norm))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
274 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
275 temp <- strsplit((strsplit(rownames(matrixW_norm)[i], ""))[[1]], "")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
276
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
277 context[i] <- paste0(temp[1], "_", temp[7])
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
278 alteration[i] <- paste0(temp[3], temp[4], temp[5])
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
279 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
280
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
281 # Melt the matrix using the signatures as variable
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
282 matrixW_melt <- melt(matrixW_norm)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
283
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
284 # Add columns for the mutation type and the sequence context
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
285 matrixW_norm <- cbind(matrixW_norm, alteration, context)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
286 # Reorder (alteration) for having the same order as in the matrice of published signatures
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
287 matrixW_norm <- matrixW_norm[order(matrixW_norm[,"alteration"], matrixW_norm[,"context"]), ]
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
288 # Reorder (columns) for having the same order as in the matrice of published signatures
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
289 matrixW_norm <- cbind(matrixW_norm[,c("alteration", "context")], matrixW_norm[,1:(ncol(matrixW_norm)-2)]) # Put the column alteration and context at the begining
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
290 # Save the matrix
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
291 write.table(matrixW_norm, file=output_matrixW, quote=F, sep="\t", col.names=T, row.names=F)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
292
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
293 # Add columns for the mutation type and the sequence context
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
294 matrixW_melt <- cbind(matrixW_melt, alteration)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
295 matrixW_melt <- cbind(matrixW_melt, context)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
296 # Rename the columns
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
297 colnames(matrixW_melt) <- c("", "Signature", "value", "alteration", "context")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
298
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
299 # Save the input for ggplot2
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
300 input_ggplot2 <- as.matrix(matrixW_melt)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
301 input_ggplot2 <- input_ggplot2[,2:ncol(input_ggplot2)]
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
302 write.table(input_ggplot2, file=output_matrixW_ggplot2, quote=F, sep="\t", col.names=T, row.names=F)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
303
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
304 # Maximum value of the y axis
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
305 max_matrixW <- as.numeric(max(matrixW_melt$value))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
306
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
307
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
308 # Base plot
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
309 p <- ggplot(matrixW_melt, aes(x=context, y=value, fill=alteration)) + geom_bar(stat="identity", width=0.5) + facet_grid(Signature ~ alteration, scales="free_y")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
310 # Color the mutation types
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
311 p <- p + scale_fill_manual(values=c("blue", "black", "red", "#828282", "#00CC33", "pink"))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
312 # Remove the legend
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
313 p <- p + guides(fill=FALSE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
314 # Customized theme (no background, no facet grid and strip, y axis only)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
315 p <- p + mytheme
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
316 # Remove the title of the x facet strip
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
317 p <- p + theme(strip.text.x=element_blank())
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
318 # Remove the x axis ticks and title
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
319 p <- p + theme(axis.title.x=element_blank(), axis.ticks.x = element_blank(), axis.title.y=element_text(size=5))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
320 # Rename the y axis
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
321 p <- p + ylab("% contribution to signatures")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
322 # Set the maximum value of the y axis to the maximum value of the matrix W
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
323 p <- p + scale_y_continuous(limits=c(0,max_matrixW), oob=squish, breaks=c(0,round(max_matrixW)))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
324 # Save some space for adding the sequence context at the bottom
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
325 p <- p + theme(plot.margin=unit(c(.3, 0, 0, 0), "cm"))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
326 p <- p + scale_x_discrete(breaks = c("A_A","A_C","A_G","A_T", "C_A","C_C","C_G","C_T", "G_A","G_C","G_G","G_T", "T_A","T_C","T_G","T_T"),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
327 labels =c('A\nA',"\nC","\nG","\nT", 'C\nA',"\nC","\nG","\nT",
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
328 'G\nA',"\nC","\nG","\nT", 'T\nA',"\nC","\nG","\nT")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
329 )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
330
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
331
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
332 #------------------------------------------------------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
333 # Change the color of the facets for the mutation type
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
334 #------------------------------------------------------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
335 cols <- rep( c("blue", "black", "red", "#828282", "#00CC33", "pink")) # Facet strip colours
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
336
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
337 # Make a grob object
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
338 Pg <- ggplotGrob(p)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
339 # To keep track of strip.background grobs
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
340 idx <- 0
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
341 # Find each strip.background and alter its backround colour
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
342 for( g in 1:length(Pg$grobs) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
343 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
344 if( grepl( "strip.absoluteGrob" , Pg$grobs[[g]]$name ) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
345 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
346 idx <- idx + 1
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
347 sb <- which( grepl( "strip\\.background" , names( Pg$grobs[[g]]$children ) ) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
348 Pg$grobs[[g]]$children[[sb]][]$gp$fill <- cols[idx]
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
349 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
350 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
351
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
352 # Reduce the size of the facet strip
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
353 Pg$heights[[3]] = unit(.05,"cm")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
354
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
355
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
356 #------------------------------------------------------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
357 # Save the graph in a png file
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
358 #------------------------------------------------------------------------------------------------------------------------------
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
359 options(bitmapType='cairo')
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
360 png(figure_matrixW_png, width=1300, heigh=500, res=300, pointsize = 4)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
361 plot(Pg)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
362 ## Add label for the mutation type above the strip facet
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
363 grid.text(0.12, unit(1,"npc") - unit(1.4,"line"), label="C>A")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
364 grid.text(0.27, unit(1,"npc") - unit(1.4,"line"), label="C>G")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
365 grid.text(0.42, unit(1,"npc") - unit(1.4,"line"), label="C>T")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
366 grid.text(0.58, unit(1,"npc") - unit(1.4,"line"), label="T>A")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
367 grid.text(0.74, unit(1,"npc") - unit(1.4,"line"), label="T>C")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
368 grid.text(0.89, unit(1,"npc") - unit(1.4,"line"), label="T>G")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
369 invisible( dev.off() )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
370
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
371
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
372
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
373 ###############################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
374 # Contribution of mutational signature in each samples #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
375 ###############################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
376
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
377 # Calculate the variability expain by the model (evar)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
378 rss <- rss(res, matrixNMF)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
379 varTot <- sum(matrixNMF^2)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
380 evar <- 1 - rss / varTot
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
381 evar_round <- round(evar, digits=3) * 100
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
382
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
383 if(is.null(opt$html))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
384 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
385 cat("\n", evar_round, "% of the variance is explained with", opt$nbSignature, "signatures\n\n")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
386 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
387
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
388 # Recover the total number of SBS per samples
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
389 NbSBS <- colSums(matrixNMF)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
390 # Normalized matrix H to 100%
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
391 matrixH_norm <- t((t(matrixH)/colSums(matrixH))*100)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
392 # Add a row name
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
393 rownames(matrixH_norm) <- rownames(matrixH_norm, do.NULL = FALSE, prefix = "row")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
394 # Replace the signature number by letter
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
395 rownames(matrixH_norm) <- sapply(1:length(rownames(matrixH_norm)), function(x) { ConvertNb2Aphabet(rownames(matrixH_norm)[x]) } )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
396
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
397 ## Combined the contribution with the total number of SBS
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
398 matrixH_norm_melt <- melt(matrixH_norm)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
399 matrixH_norm_melt <- cbind(matrixH_norm_melt, rep(NbSBS, each = opt$nbSignature))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
400 colnames(matrixH_norm_melt) <- c("Signature", "Sample", "Percent_Contri", "Total_SBS")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
401
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
402 # Calculate the contribution in number of SBS
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
403 matrixH_norm_melt$ContriSBS <- sapply(1:nrow(matrixH_norm_melt), function(x) { Contri2SignSBS(matrixH_norm_melt$Total_SBS[x], matrixH_norm_melt$Percent_Contri[x]) } )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
404 colnames(matrixH_norm_melt) <- c("Signature", "Sample", "Percent_Contri", "Total_SBS", "CountSBS_Contri")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
405
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
406 # Save the matrix
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
407 write.table(matrixH_norm_melt, file=output_matrixH_ggplot2, quote=F, sep="\t", col.names=T, row.names=F)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
408
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
409 # Base plot for the contribution of each samples according the count of mutations
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
410 p2 <- ggplot(matrixH_norm_melt, aes(x=reorder(Sample, -CountSBS_Contri), y=CountSBS_Contri, fill=Signature)) + geom_bar(stat="identity") + theme_classic()
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
411 # Reverse the y axis
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
412 p2 <- p2 + scale_y_reverse()
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
413 # Rename the y and x axis
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
414 p2 <- p2 + ylab("Number of mutations") + xlab("Samples")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
415 # Remove the x axis line
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
416 p2 <- p2 + theme(axis.line.x=element_blank())
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
417 # Add sample names
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
418 if(ncol(matrixNMF) <= 35)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
419 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
420 p2 <- p2 + theme(axis.text.x = element_text(angle=90))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
421 } else
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
422 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
423 p2 <- p2 + theme(axis.text.x = element_blank())
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
424 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
425
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
426 # Base plot for the contribution of each samples in percentages
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
427 p3 <- ggplot(matrixH_norm_melt, aes(x=reorder(Sample, -CountSBS_Contri), y=Percent_Contri, fill=Signature)) + geom_bar(stat="identity") + theme_classic() + theme(axis.text.x = element_blank()) + xlab("") + ylab("% of mutations")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
428 # Remove the x axis line
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
429 p3 <- p3 + theme(axis.line.x=element_blank(), axis.ticks.x=element_blank())
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
430
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
431 # Plot PNG
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
432 png(figure_matrixH_png, width=3000, heigh=2000, res=300)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
433 # Combined the two plots for the contribution of the samples
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
434 suppressWarnings( grid_arrange_shared_legend(p3, p2) )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
435 invisible( dev.off() )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
436
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
437
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
438 ###############################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
439 # Average contributions of each signature in each cluster #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
440 ###############################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
441
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
442 matrixH_cluster <- cbind(matrix_cluster[,1], t(matrixH_norm))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
443 colnames(matrixH_cluster) <- c("Cluster", colnames(t(matrixH_norm)))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
444
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
445 df <- as.data.frame(matrixH_cluster)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
446
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
447 tmp_mat <- sapply(1:opt$nbSignature, function(x) { meanCluster(df[df[,1] == x,]) } )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
448 # Add a name for the row and the col
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
449 rownames(tmp_mat) <- sapply(1:opt$nbSignature, function(x) { paste0("Sig. ", x) } )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
450 colnames(tmp_mat) <- sapply(1:opt$nbSignature, function(x) { paste0("Cluster ", x) } )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
451 tmp_mat <- t(tmp_mat)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
452 # Recover the number of samples in each cluster
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
453 nbSampleByCluster <- sapply(1:opt$nbSignature, function(x) { as.numeric( strsplit( as.character(dim(df[df[,1] == x,])), " " ) ) } )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
454 # Combined the average contribution and the number of samples
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
455 tmp_mat <- cbind(tmp_mat, nbSampleByCluster[1,])
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
456 # Add a name for the row and the col
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
457 colnames(tmp_mat)[opt$nbSignature+1] <- "Number of samples"
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
458 # Save the matrix
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
459 write.table(tmp_mat, file=output_matrixH_cluster, quote=F, sep="\t", col.names=T, row.names=T)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
460
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
461 ## Create an image of the table with ggplot2
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
462 # Dummy plot
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
463 p4 <- qplot(1:10, 1:10, geom = "blank") +
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
464 theme(panel.grid.major = element_blank(),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
465 panel.grid.minor = element_blank(),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
466 panel.border = element_rect(fill=NA,color="white", size=0.5, linetype="solid"),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
467 axis.line = element_blank(),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
468 axis.ticks = element_blank(),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
469 panel.background = element_rect(fill="white"),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
470 plot.background = element_rect(fill="white"),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
471 legend.position = "none",
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
472 axis.text = element_blank(),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
473 axis.title = element_blank()
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
474 )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
475 # Adding a table
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
476 p4 <- p4 + annotation_custom(grob = tableGrob(tmp_mat),
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
477 xmin = 4, xmax = 7,
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
478 ymin = 0, ymax = 10)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
479
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
480 # Save the table
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
481 png(figure_matrixH_cluster, width=2500, heigh=1000, res=300)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
482 # Combined the two plots for the contribution of the samples
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
483 plot(p4)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
484 invisible( dev.off() )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
485
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
486
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
487 # Delete the empty plot created by the script
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
488 if (file.exists("Rplots.pdf")) invisible( file.remove("Rplots.pdf") )
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
489
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
490
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
491
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
492 ###############################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
493 # Create HTML output for Galaxy #
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
494 ###############################################################################
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
495 if(! is.null(opt$html))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
496 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
497 # Galaxy doesn't need the full path to the files so redefine the output filenames
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
498 output_cluster_html <- paste0("NMF/Files/Cluster_MixtureCoeff.txt")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
499 figure_cluster_html <- paste0("NMF/Figures/Heatmap_MixtureCoeff.png")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
500 output_matrixW_html <- paste0("NMF/Files/MatrixW-Normto100.txt")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
501 output_matrixH_ggplot2_html <- paste0("NMF/Files/MatrixH-Inputggplot2.txt")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
502 output_matrixH_cluster_html <- paste0("NMF/Files/Average_ContriByCluster.txt")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
503 figure_matrixW_png_html <- paste0("NMF/Figures/CompositionSomaticMutation.png")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
504 figure_matrixH_png_html <- paste0("NMF/Figures/ContributionMutationSignature.png")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
505 figure_matrixH_cluster_html <- paste0("NMF/Figures/Average_ContriByCluster.png")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
506
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
507
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
508 #### Create an archive with all the results
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
509 setwd(opt$output)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
510 # zip("NMF.tar.gz", "NMF")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
511 system("zip -r NMF.zip NMF")
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
512
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
513 write("<html><body>", file=opt$html)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
514 write("<center> <h2> NMF Mutational signatures analysis </h2> </center>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
515
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
516 write("<br/> Download the results", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
517 write("<br/><a href=NMF.zip>NMF.zip</a><br/>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
518
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
519 #### Heatmap
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
520 write("<table>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
521 write("<tr> <br/> <th><h3>Heatmap of the mixture coefficient matrix</h3></th> </tr>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
522 write(paste0("<tr> <td> <center> <br/> <a href=", output_cluster_html, ">Cluster_MixtureCoeff.txt</a> </center> </td> </tr>"), file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
523 write("<tr>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
524
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
525 if(!file.exists(figure_cluster))
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
526 {
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
527 write("WARNING: NMF package can't plot the heatmap when the samples size is above 300. <br/>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
528 }else{
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
529 write(paste0("<td> <center> <a href=", figure_cluster_html, ">"), file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
530 write(paste0("<img src=", figure_cluster_html, "/></a> <center> </td>"), file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
531 }
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
532 write("</tr>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
533 write("</table>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
534
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
535 ### Signature composition
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
536 write("<br/><br/>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
537 write("<table>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
538 write("<tr>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
539 write("<th><h3>Signature composition</h3></th>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
540 write("</tr>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
541 write(paste0("<tr><td>", evar_round, "% of the variance is explained with ", opt$nbSignature, " signatures", "</td></tr>"), file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
542 write("<tr height=15></tr>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
543 write(paste0("<tr><td> <center> <a href=", output_matrixW_html ,">Composition somatic mutation (input matrix for the tool MutSpec-Compare)</a><center></td></tr>"), file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
544 write("<tr>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
545 write(paste0("<td><a href=", figure_matrixW_png_html, ">"), file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
546 write(paste0("<img width=1000 src=", figure_matrixW_png_html, "/></a></td>"), file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
547 write("</tr> ", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
548 write("</table>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
549 write("<br/><br/>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
550
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
551 ### Sample contribution to signatures
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
552 write("<table>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
553 write("<tr>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
554 write("<th><h3>Sample contribution to signatures</h3></th>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
555 write("</tr>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
556 write(paste0("<tr><td> <center> <a href=", output_matrixH_ggplot2_html, ">Contribution mutation signature matrix</a></center></td></tr>"), file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
557 write("<tr>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
558 write(paste0("<td><a href=", figure_matrixH_png_html, ">"), file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
559 write(paste0("<img width=700 src=", figure_matrixH_png_html, "/></a></td>"), file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
560 write("</tr>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
561 write("</table>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
562 write("<br/><br/>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
563
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
564 ### Average contributions of each signatures in each cluster
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
565 write("<table>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
566 write("<tr>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
567 write("<th><h3>Average contributions of each signatures in each cluster</h3></th>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
568 write(paste0("<tr><td> <center> <a href=", output_matrixH_cluster_html, ">Average contributions</a></center></td></tr>"), file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
569 write("<tr>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
570 write(paste0("<td><a href=", figure_matrixH_cluster_html, ">"), file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
571 write(paste0("<img width=700 src=", figure_matrixH_cluster_html, "/></a></td>"), file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
572 write("</tr> ", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
573 write("</table>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
574 write("<br/><br/>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
575
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
576 write("<br/><br/><br/><br/>", file=opt$html, append=TRUE)
eda59b985b1c Uploaded
iarc
parents: 6
diff changeset
577 }