Mercurial > repos > vandelj > giant_hierarchical_clustering
comparison src/heatMapClustering.R @ 0:14045c80a222 draft
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit cb276a594444c8f32e9819fefde3a21f121d35df"
author | vandelj |
---|---|
date | Fri, 26 Jun 2020 09:38:23 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:14045c80a222 |
---|---|
1 # A command-line interface to plot heatmap based on expression or diff. exp. analysis | |
2 # written by Jimmy Vandel | |
3 # one of these arguments is required: | |
4 # | |
5 # | |
6 initial.options <- commandArgs(trailingOnly = FALSE) | |
7 file.arg.name <- "--file=" | |
8 script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)]) | |
9 script.basename <- dirname(script.name) | |
10 source(file.path(script.basename, "utils.R")) | |
11 source(file.path(script.basename, "getopt.R")) | |
12 | |
13 #addComment("Welcome R!") | |
14 | |
15 # setup R error handling to go to stderr | |
16 options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | |
17 | |
18 # we need that to not crash galaxy with an UTF8 error on German LC settings. | |
19 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
20 loc <- Sys.setlocale("LC_NUMERIC", "C") | |
21 | |
22 #get starting time | |
23 start.time <- Sys.time() | |
24 | |
25 | |
26 options(stringAsfactors = FALSE, useFancyQuotes = FALSE, OutDec=".") | |
27 | |
28 #get options | |
29 args <- commandArgs() | |
30 | |
31 # get options, using the spec as defined by the enclosed list. | |
32 # we read the options from the default: commandArgs(TRUE). | |
33 spec <- matrix(c( | |
34 "expressionFile", "x", 1, "character", | |
35 "diffAnalyseFile", "x", 1, "character", | |
36 "factorInfo","x", 1, "character", | |
37 "genericData","x", 0, "logical", | |
38 "comparisonName","x",1,"character", | |
39 "comparisonNameLow","x",1,"character", | |
40 "comparisonNameHigh","x",1,"character", | |
41 "filterInputOutput","x", 1, "character", | |
42 "FCthreshold","x", 1, "double", | |
43 "pvalThreshold","x", 1, "double", | |
44 "geneListFiltering","x",1,"character", | |
45 "clusterNumber","x",1,"integer", | |
46 "maxRows","x",1,"integer", | |
47 "sampleClusterNumber","x",1,"integer", | |
48 "dataTransformation","x",1,"character", | |
49 "distanceMeasure","x",1,"character", | |
50 "aggloMethod","x",1,"character", | |
51 "personalColors","x",1,"character", | |
52 "sideBarColorPalette","x",1,"character", | |
53 "format", "x", 1, "character", | |
54 "quiet", "x", 0, "logical", | |
55 "log", "x", 1, "character", | |
56 "outputFile" , "x", 1, "character"), | |
57 byrow=TRUE, ncol=4) | |
58 opt <- getoptLong(spec) | |
59 | |
60 # enforce the following required arguments | |
61 if (is.null(opt$log)) { | |
62 addComment("[ERROR]'log file' is required") | |
63 q( "no", 1, F ) | |
64 } | |
65 addComment("[INFO]Start of R script",T,opt$log,display=FALSE) | |
66 if (is.null(opt$format)) { | |
67 addComment("[ERROR]'output format' is required",T,opt$log) | |
68 q( "no", 1, F ) | |
69 } | |
70 if (is.null(opt$outputFile)) { | |
71 addComment("[ERROR]'output file' is required",T,opt$log) | |
72 q( "no", 1, F ) | |
73 } | |
74 | |
75 if(is.null(opt$expressionFile) && !is.null(opt$genericData)){ | |
76 addComment("[ERROR]generic data clustering is based on expression clustering",T,opt$log) | |
77 q( "no", 1, F ) | |
78 } | |
79 | |
80 if (is.null(opt$clusterNumber) || opt$clusterNumber<2) { | |
81 addComment("[ERROR]valid genes clusters number is required",T,opt$log) | |
82 q( "no", 1, F ) | |
83 } | |
84 | |
85 if (is.null(opt$sampleClusterNumber) || opt$sampleClusterNumber<1) { | |
86 addComment("[ERROR]valid samples clusters number is required",T,opt$log) | |
87 q( "no", 1, F ) | |
88 } | |
89 | |
90 if (is.null(opt$dataTransformation)) { | |
91 addComment("[ERROR]data transformation option is required",T,opt$log) | |
92 q( "no", 1, F ) | |
93 } | |
94 | |
95 if (is.null(opt$distanceMeasure)) { | |
96 addComment("[ERROR]distance measure option is required",T,opt$log) | |
97 q( "no", 1, F ) | |
98 } | |
99 | |
100 if (is.null(opt$aggloMethod)) { | |
101 addComment("[ERROR]agglomeration method option is required",T,opt$log) | |
102 q( "no", 1, F ) | |
103 } | |
104 | |
105 if (is.null(opt$maxRows) || opt$maxRows<2) { | |
106 addComment("[ERROR]valid plotted row number is required",T,opt$log) | |
107 q( "no", 1, F ) | |
108 } | |
109 | |
110 if (!is.null(opt[["comparisonName"]]) && nchar(opt[["comparisonName"]])==0){ | |
111 addComment("[ERROR]you have to specify comparison",T,opt$log) | |
112 q( "no", 1, F ) | |
113 } | |
114 | |
115 if (!is.null(opt$comparisonNameLow) && nchar(opt$comparisonNameLow)==0){ | |
116 addComment("[ERROR]you have to specify comparisonLow",T,opt$log) | |
117 q( "no", 1, F ) | |
118 } | |
119 | |
120 if (!is.null(opt$comparisonNameHigh) && nchar(opt$comparisonNameHigh)==0){ | |
121 addComment("[ERROR]you have to specify comparisonHigh",T,opt$log) | |
122 q( "no", 1, F ) | |
123 } | |
124 | |
125 if (is.null(opt$genericData) && (!is.null(opt$comparisonNameLow) || !is.null(opt$comparisonNameHigh))){ | |
126 addComment("[ERROR]comparisonLow and comparisonHigh can be specified only with generic data",T,opt$log) | |
127 q( "no", 1, F ) | |
128 } | |
129 | |
130 if (!is.null(opt$genericData) && !is.null(opt[["comparisonName"]])){ | |
131 addComment("[ERROR]basic comparison cannot be specified for generic data",T,opt$log) | |
132 q( "no", 1, F ) | |
133 } | |
134 | |
135 if ((!is.null(opt[["comparisonName"]]) || !is.null(opt$comparisonNameLow) || !is.null(opt$comparisonNameHigh)) && is.null(opt$diffAnalyseFile)) { | |
136 addComment("[ERROR]'diff. exp. analysis file' is required",T,opt$log) | |
137 q( "no", 1, F ) | |
138 } | |
139 | |
140 if (!is.null(opt$genericData) && !is.null(opt$diffAnalyseFile) && is.null(opt$comparisonNameLow) && is.null(opt$comparisonNameHigh)){ | |
141 addComment("[ERROR]Missing comparison information for filtering",T,opt$log) | |
142 q( "no", 1, F ) | |
143 } | |
144 | |
145 if ((!is.null(opt$FCthreshold) || !is.null(opt$pvalThreshold)) && (is.null(opt[["comparisonName"]]) && is.null(opt$comparisonNameLow) && is.null(opt$comparisonNameHigh))) { | |
146 addComment("[ERROR]'comparisons' are missing for filtering",T,opt$log) | |
147 q( "no", 1, F ) | |
148 } | |
149 | |
150 if ((!is.null(opt$FCthreshold) || !is.null(opt$pvalThreshold)) && !is.null(opt$geneListFiltering)) { | |
151 addComment("[ERROR]Cannot have two filtering strategies",T,opt$log) | |
152 q( "no", 1, F ) | |
153 } | |
154 | |
155 verbose <- if (is.null(opt$quiet)) { | |
156 TRUE | |
157 }else{ | |
158 FALSE} | |
159 | |
160 addComment("[INFO]Parameters checked!",T,opt$log,display=FALSE) | |
161 | |
162 addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE) | |
163 addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE) | |
164 | |
165 #directory for plots and HTML | |
166 dir.create(file.path(getwd(), "plotDir")) | |
167 dir.create(file.path(getwd(), "plotLyDir")) | |
168 | |
169 #silent package loading | |
170 suppressPackageStartupMessages({ | |
171 library("plotly") | |
172 library("dendextend") | |
173 #library("ggdendro") | |
174 #library("plyr") | |
175 library("ggplot2") | |
176 library("heatmaply") | |
177 library("circlize") | |
178 #library("RColorBrewer") | |
179 #source("https://bioconductor.org/biocLite.R") | |
180 #biocLite("ComplexHeatmap") | |
181 library("ComplexHeatmap") | |
182 #library("processx") | |
183 }) | |
184 | |
185 expressionToCluster=!is.null(opt$expressionFile) | |
186 | |
187 #load input data files | |
188 if(expressionToCluster){ | |
189 #first expression data | |
190 expressionMatrix=read.csv(file=opt$expressionFile,header=F,sep="\t",colClasses="character") | |
191 #remove first row to convert it as colnames (to avoid X before colnames with header=T) | |
192 colNamesData=expressionMatrix[1,-1] | |
193 expressionMatrix=expressionMatrix[-1,] | |
194 #remove first colum to convert it as rownames | |
195 rowNamesData=expressionMatrix[,1] | |
196 expressionMatrix=expressionMatrix[,-1] | |
197 if(is.data.frame(expressionMatrix)){ | |
198 expressionMatrix=data.matrix(expressionMatrix) | |
199 }else{ | |
200 expressionMatrix=data.matrix(as.numeric(expressionMatrix)) | |
201 } | |
202 dimnames(expressionMatrix)=list(rowNamesData,colNamesData) | |
203 | |
204 #check input files | |
205 if (!is.numeric(expressionMatrix)) { | |
206 addComment("[ERROR]Expression data is not fully numeric!",T,opt$log,display=FALSE) | |
207 q( "no", 1, F ) | |
208 } | |
209 | |
210 addComment("[INFO]Expression data loaded and checked") | |
211 addComment(c("[INFO]Dim of expression matrix:",dim(expressionMatrix)),T,opt$log,display=FALSE) | |
212 } | |
213 | |
214 nbComparisons=0 | |
215 nbColPerContrast=5 | |
216 comparisonMatrix=NULL | |
217 comparisonMatrixInfoGene=NULL | |
218 #if available comparisons | |
219 if(!is.null(opt[["comparisonName"]])){ | |
220 #load results from differential expression analysis | |
221 #consider first row contains column names | |
222 comparisonMatrix=read.csv(file=opt$diffAnalyseFile,header=F,sep="\t") | |
223 colnames(comparisonMatrix)=as.character(unlist(comparisonMatrix[1,])) | |
224 #remove the second line also as it's information line (p-val,FDR.p-val,FC,logFC) | |
225 comparisonMatrix=comparisonMatrix[-c(1,2),] | |
226 #remove first and second colums, convert the first one as rownames | |
227 rownames(comparisonMatrix)=as.character(unlist(comparisonMatrix[,1])) | |
228 #and save second column content that contain geneInfo | |
229 comparisonMatrixInfoGene=as.character(unlist(comparisonMatrix[,2])) | |
230 names(comparisonMatrixInfoGene)=as.character(unlist(comparisonMatrix[,1])) | |
231 comparisonMatrix=comparisonMatrix[,-c(1,2)] | |
232 | |
233 comparisonMatrix=matrix(as.numeric(as.matrix(comparisonMatrix)),ncol=ncol(comparisonMatrix),dimnames = dimnames(comparisonMatrix)) | |
234 | |
235 if (ncol(comparisonMatrix)%%nbColPerContrast != 0) { | |
236 addComment("[ERROR]Diff. exp. data does not contain good number of columns per contrast, should contains in this order:p-val,FDR.p-val,FC,log2(FC) and t-stat",T,opt$log,display=FALSE) | |
237 q( "no", 1, F ) | |
238 } | |
239 | |
240 if(max(comparisonMatrix[,c(seq(1,ncol(comparisonMatrix),nbColPerContrast),seq(2,ncol(comparisonMatrix),nbColPerContrast))])>1 || min(comparisonMatrix[,c(seq(1,ncol(comparisonMatrix),nbColPerContrast),seq(2,ncol(comparisonMatrix),nbColPerContrast))])<0){ | |
241 addComment("[ERROR]Seem that diff. exp. data does not contain correct values for p-val and FDR.p-val columns, should be including in [0,1] interval",T,opt$log,display=FALSE) | |
242 q( "no", 1, F ) | |
243 } | |
244 | |
245 if (!is.numeric(comparisonMatrix)) { | |
246 addComment("[ERROR]Diff. exp. data is not fully numeric!",T,opt$log,display=FALSE) | |
247 q( "no", 1, F ) | |
248 } | |
249 | |
250 if(expressionToCluster && length(setdiff(rownames(comparisonMatrix),rownames(expressionMatrix)))!=0){ | |
251 addComment("[WARNING]All genes from diff. exp. file are not included in expression file",T,opt$log,display=FALSE) | |
252 } | |
253 | |
254 if(expressionToCluster && length(setdiff(rownames(expressionMatrix),rownames(comparisonMatrix)))!=0){ | |
255 addComment("[WARNING]All genes from expression file are not included in diff. exp. file",T,opt$log,display=FALSE) | |
256 } | |
257 | |
258 addComment("[INFO]Diff. exp. analysis loaded and checked",T,opt$log,display=FALSE) | |
259 addComment(c("[INFO]Dim of original comparison matrix:",dim(comparisonMatrix)),T,opt$log,display=FALSE) | |
260 | |
261 #restrict to user specified comparisons | |
262 restrictedComparisons=unlist(strsplit(opt[["comparisonName"]],",")) | |
263 #should be improved to avoid selection of column names starting too similarly | |
264 colToKeep=which(unlist(lapply(colnames(comparisonMatrix),function(x)any(startsWith(x,restrictedComparisons))))) | |
265 comparisonMatrix=matrix(comparisonMatrix[,colToKeep],ncol=length(colToKeep),dimnames = list(rownames(comparisonMatrix),colnames(comparisonMatrix)[colToKeep])) | |
266 | |
267 #get number of required comparisons | |
268 nbComparisons=ncol(comparisonMatrix)/nbColPerContrast | |
269 | |
270 addComment(c("[INFO]Dim of effective filtering matrix:",dim(comparisonMatrix)),T,opt$log,display=FALSE) | |
271 } | |
272 | |
273 #should be only the case with generic data | |
274 if(!is.null(opt$comparisonNameLow) || !is.null(opt$comparisonNameHigh)){ | |
275 #load generic data used for filtering | |
276 nbColPerContrast=1 | |
277 #consider first row contains column names | |
278 comparisonMatrix=read.csv(file=opt$diffAnalyseFile,header=F,sep="\t") | |
279 colnames(comparisonMatrix)=as.character(unlist(comparisonMatrix[1,])) | |
280 #remove first colum, convert the first one as rownames | |
281 rownames(comparisonMatrix)=as.character(unlist(comparisonMatrix[,1])) | |
282 comparisonMatrix=comparisonMatrix[-1,-1] | |
283 | |
284 comparisonMatrix=matrix(as.numeric(as.matrix(comparisonMatrix)),ncol=ncol(comparisonMatrix),dimnames = dimnames(comparisonMatrix)) | |
285 | |
286 if (!is.numeric(comparisonMatrix)) { | |
287 addComment("[ERROR]Filtering matrix is not fully numeric!",T,opt$log,display=FALSE) | |
288 q( "no", 1, F ) | |
289 } | |
290 | |
291 if(expressionToCluster && length(setdiff(rownames(comparisonMatrix),rownames(expressionMatrix)))!=0){ | |
292 addComment("[WARNING]All genes from filtering file are not included in expression file",T,opt$log,display=FALSE) | |
293 } | |
294 | |
295 if(expressionToCluster && length(setdiff(rownames(expressionMatrix),rownames(comparisonMatrix)))!=0){ | |
296 addComment("[WARNING]All genes from expression file are not included in filtering file",T,opt$log,display=FALSE) | |
297 } | |
298 | |
299 addComment("[INFO]Filtering file loaded and checked",T,opt$log,display=FALSE) | |
300 addComment(c("[INFO]Dim of original filtering matrix:",dim(comparisonMatrix)),T,opt$log,display=FALSE) | |
301 | |
302 #restrict to user specified comparisons | |
303 restrictedComparisons=c() | |
304 if(!is.null(opt$comparisonNameLow))restrictedComparisons=unique(c(restrictedComparisons,unlist(strsplit(opt$comparisonNameLow,",")))) | |
305 if(!is.null(opt$comparisonNameHigh))restrictedComparisons=unique(c(restrictedComparisons,unlist(strsplit(opt$comparisonNameHigh,",")))) | |
306 | |
307 if (!all(restrictedComparisons%in%colnames(comparisonMatrix))){ | |
308 addComment("[ERROR]Selected columns in filtering file are not present in filtering matrix!",T,opt$log,display=FALSE) | |
309 q( "no", 1, F ) | |
310 } | |
311 comparisonMatrix=matrix(comparisonMatrix[,restrictedComparisons],ncol=length(restrictedComparisons),dimnames = list(rownames(comparisonMatrix),restrictedComparisons)) | |
312 | |
313 #get number of required comparisons | |
314 nbComparisons=ncol(comparisonMatrix) | |
315 | |
316 addComment(c("[INFO]Dim of effective filtering matrix:",dim(comparisonMatrix)),T,opt$log,display=FALSE) | |
317 } | |
318 | |
319 | |
320 | |
321 factorInfoMatrix=NULL | |
322 if(!is.null(opt$factorInfo)){ | |
323 #get group information | |
324 #load factors file | |
325 factorInfoMatrix=read.csv(file=opt$factorInfo,header=F,sep="\t",colClasses="character") | |
326 #remove first row to convert it as colnames | |
327 colnames(factorInfoMatrix)=factorInfoMatrix[1,] | |
328 factorInfoMatrix=factorInfoMatrix[-1,] | |
329 #use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case | |
330 rownames(factorInfoMatrix)=factorInfoMatrix[,1] | |
331 | |
332 factorBarColor=colnames(factorInfoMatrix)[2] | |
333 | |
334 if(ncol(factorInfoMatrix)>2){ | |
335 addComment("[ERROR]Factors file should not contain more than 2 columns",T,opt$log,display=FALSE) | |
336 q( "no", 1, F ) | |
337 } | |
338 | |
339 #factor file is used for color band on heatmap, so all expression matrix column should be in the factor file | |
340 if(expressionToCluster && length(setdiff(colnames(expressionMatrix),rownames(factorInfoMatrix)))!=0){ | |
341 addComment("[ERROR]Missing samples in factor file",T,opt$log,display=FALSE) | |
342 q( "no", 1, F ) | |
343 } | |
344 | |
345 #factor file is used for color band on heatmap, so all comparison matrix column should be in the factor file | |
346 if(!expressionToCluster && length(setdiff(colnames(comparisonMatrix),rownames(factorInfoMatrix)))!=0){ | |
347 addComment("[ERROR]Missing differential contrasts in factor file",T,opt$log,display=FALSE) | |
348 q( "no", 1, F ) | |
349 } | |
350 | |
351 addComment("[INFO]Factors OK",T,opt$log,display=FALSE) | |
352 addComment(c("[INFO]Dim of factorInfo matrix:",dim(factorInfoMatrix)),T,opt$log,display=FALSE) | |
353 } | |
354 | |
355 if(!is.null(opt$personalColors)){ | |
356 ##parse personal colors | |
357 personalColors=unlist(strsplit(opt$personalColors,",")) | |
358 if(length(personalColors)==2){ | |
359 ##add medium color between two to get three colors | |
360 personalColors=c(personalColors[1],paste(c("#",as.character(as.hexmode(floor(apply(col2rgb(personalColors),1,mean))))),collapse=""),personalColors[2]) | |
361 } | |
362 if(length(personalColors)!=3){ | |
363 addComment("[ERROR]Personalized colors doesn't contain enough colors",T,opt$log,display=FALSE) | |
364 q( "no", 1, F ) | |
365 } | |
366 | |
367 } | |
368 | |
369 | |
370 if(!is.null(opt$filterInputOutput) && opt$filterInputOutput=="input"){ | |
371 #filter input data | |
372 | |
373 if(is.null(opt$geneListFiltering)){ | |
374 #filtering using stat thresholds | |
375 #rowToKeep=intersect(which(comparisonMatrix[,seq(2,ncol(comparisonMatrix),4)]<=opt$pvalThreshold),which(abs(comparisonMatrix[,seq(4,ncol(comparisonMatrix),4)])>=log2(opt$FCthreshold))) | |
376 if(is.null(opt$genericData)){ | |
377 #diff. expression matrix | |
378 rowToKeep=names(which(unlist(apply(comparisonMatrix,1,function(x)length(intersect(which(x[seq(2,length(x),nbColPerContrast)]<opt$pvalThreshold),which(abs(x[seq(4,length(x),nbColPerContrast)])>log2(opt$FCthreshold))))!=0)))) | |
379 }else{ | |
380 #generic filtering matrix | |
381 rowToKeep=rownames(comparisonMatrix) | |
382 if(!is.null(opt$comparisonNameLow)){ | |
383 restrictedLowComparisons=unlist(strsplit(opt$comparisonNameLow,",")) | |
384 rowToKeep=intersect(rowToKeep,names(which(unlist(apply(comparisonMatrix,1,function(x)length(which(x[restrictedLowComparisons]>opt$FCthreshold))!=0))))) | |
385 } | |
386 if(!is.null(opt$comparisonNameHigh)){ | |
387 restrictedHighComparisons=unlist(strsplit(opt$comparisonNameHigh,",")) | |
388 rowToKeep=intersect(rowToKeep,names(which(unlist(apply(comparisonMatrix,1,function(x)length(which(x[restrictedHighComparisons]<opt$pvalThreshold))!=0))))) | |
389 } | |
390 } | |
391 }else{ | |
392 #filtering using user gene list | |
393 geneListFiltering=read.csv(opt$geneListFiltering,as.is = 1,header=F) | |
394 rowToKeep=unlist(c(geneListFiltering)) | |
395 } | |
396 | |
397 if(!is.null(comparisonMatrix) && !all(rowToKeep%in%rownames(comparisonMatrix))){ | |
398 #should arrive only with user gene list filtering with diff.exp. results clustering | |
399 addComment("[WARNING] some genes of the user defined list are not in the diff. exp. input file",T,opt$log) | |
400 rowToKeep=intersect(rowToKeep,rownames(comparisonMatrix)) | |
401 } | |
402 | |
403 if(expressionToCluster && !all(rowToKeep%in%rownames(expressionMatrix))){ | |
404 addComment("[WARNING] some genes selected by the input filter are not in the expression file",T,opt$log) | |
405 rowToKeep=intersect(rowToKeep,rownames(expressionMatrix)) | |
406 } | |
407 | |
408 if(length(rowToKeep)==0){ | |
409 addComment("[ERROR]No gene survived to the input filtering thresholds, execution will be aborted. | |
410 Please consider to change threshold values and re-run the tool.",T,opt$log) | |
411 q( "no", 1, F ) | |
412 } | |
413 | |
414 #filter comparison matrix | |
415 if(!is.null(comparisonMatrix)){ | |
416 comparisonMatrix=matrix(comparisonMatrix[rowToKeep,],ncol=ncol(comparisonMatrix),dimnames = list(rowToKeep,colnames(comparisonMatrix))) | |
417 if(!is.null(comparisonMatrixInfoGene))comparisonMatrixInfoGene=comparisonMatrixInfoGene[rowToKeep] | |
418 } | |
419 #then expression matrix | |
420 if(expressionToCluster)expressionMatrix=matrix(expressionMatrix[rowToKeep,],ncol=ncol(expressionMatrix),dimnames = list(rowToKeep,colnames(expressionMatrix))) | |
421 | |
422 if(!is.null(comparisonMatrix) && expressionToCluster && nrow(comparisonMatrix)!=nrow(expressionMatrix)){ | |
423 addComment("[ERROR]Problem during input filtering, please check code",T,opt$log,display=FALSE) | |
424 q( "no", 1, F ) | |
425 } | |
426 | |
427 addComment("[INFO]Filtering step done",T,opt$log,display=FALSE) | |
428 addComment(c("[INFO]Input filtering step:",length(rowToKeep),"remaining rows"),T,opt$log,display=FALSE) | |
429 } | |
430 | |
431 | |
432 addComment("[INFO]Ready to plot",T,opt$log,display=FALSE) | |
433 | |
434 ##--------------------- | |
435 | |
436 #plot heatmap | |
437 if(expressionToCluster){ | |
438 #will make clustering based on expression value or generic value | |
439 dataToHeatMap=expressionMatrix | |
440 valueMeaning="Intensity" | |
441 if(!is.null(opt$genericData))valueMeaning="Value" | |
442 }else{ | |
443 #will make clustering on log2(FC) values | |
444 dataToHeatMap=matrix(comparisonMatrix[,seq(4,ncol(comparisonMatrix),nbColPerContrast)],ncol=nbComparisons,dimnames = list(rownames(comparisonMatrix),colnames(comparisonMatrix)[seq(1,ncol(comparisonMatrix),nbColPerContrast)])) | |
445 valueMeaning="Log2(FC)" | |
446 } | |
447 addComment(c("[INFO]Dim of heatmap matrix:",dim(dataToHeatMap)),T,opt$log,display=FALSE) | |
448 | |
449 if(nrow(dataToHeatMap)==1 && ncol(dataToHeatMap)==1){ | |
450 addComment("[ERROR]Cannot make clustering with unique cell tab",T,opt$log,display=FALSE) | |
451 q( "no", 1, F ) | |
452 } | |
453 | |
454 | |
455 #apply data transformation if needed | |
456 if(opt$dataTransformation=="log"){ | |
457 dataToHeatMap=log(dataToHeatMap) | |
458 valueMeaning=paste(c("log(",valueMeaning,")"),collapse="") | |
459 addComment("[INFO]Data to cluster and to display in the heatmap are log transformed",T,opt$log,display=FALSE) | |
460 } | |
461 if(opt$dataTransformation=="log2"){ | |
462 dataToHeatMap=log2(dataToHeatMap) | |
463 valueMeaning=paste(c("log2(",valueMeaning,")"),collapse="") | |
464 addComment("[INFO]Data to cluster and to display in the heatmap are log2 transformed",T,opt$log,display=FALSE) | |
465 } | |
466 | |
467 maxRowsToDisplay=opt$maxRows | |
468 | |
469 nbClusters=opt$clusterNumber | |
470 if(nbClusters>nrow(dataToHeatMap)){ | |
471 #correct number of clusters if needed | |
472 nbClusters=nrow(dataToHeatMap) | |
473 addComment(c("[WARNING]Not enough rows to reach required clusters number, it is reduced to number of rows:",nbClusters),T,opt$log,display=FALSE) | |
474 } | |
475 | |
476 nbSampleClusters=opt$sampleClusterNumber | |
477 if(nbSampleClusters>ncol(dataToHeatMap)){ | |
478 #correct number of clusters if needed | |
479 nbSampleClusters=ncol(dataToHeatMap) | |
480 addComment(c("[WARNING]Not enough columns to reach required conditions clusters number, it is reduced to number of columns:",nbSampleClusters),T,opt$log,display=FALSE) | |
481 } | |
482 | |
483 colClust=FALSE | |
484 rowClust=FALSE | |
485 effectiveRowClust=FALSE | |
486 | |
487 #make appropriate clustering if needed | |
488 if(nrow(dataToHeatMap)>1 && nbClusters>1)rowClust=hclust(distExtended(dataToHeatMap,method = opt$distanceMeasure),method = opt$aggloMethod) | |
489 if(ncol(dataToHeatMap)>1 && nbSampleClusters>1)colClust=hclust(distExtended(t(dataToHeatMap),method = opt$distanceMeasure),method = opt$aggloMethod) | |
490 | |
491 if(nrow(dataToHeatMap)>maxRowsToDisplay){ | |
492 #make subsampling based on preliminary global clustering | |
493 #clusteringResults=cutree(rowClust,nbClusters) | |
494 #heatMapGenesToKeep=unlist(lapply(seq(1,nbClusters),function(x)sample(which(clusteringResults==x),min(length(which(clusteringResults==x)),round(maxRowsToDisplay/nbClusters))))) | |
495 ##OR | |
496 #basic subsampling | |
497 heatMapGenesToKeep=sample(rownames(dataToHeatMap),maxRowsToDisplay) | |
498 effectiveDataToHeatMap=matrix(dataToHeatMap[heatMapGenesToKeep,],ncol=ncol(dataToHeatMap),dimnames=list(heatMapGenesToKeep,colnames(dataToHeatMap))) | |
499 effectiveNbClusters=min(nbClusters,maxRowsToDisplay) | |
500 if(nrow(effectiveDataToHeatMap)>1 && effectiveNbClusters>1)effectiveRowClust=hclust(distExtended(effectiveDataToHeatMap, method = opt$distanceMeasure),method = opt$aggloMethod) | |
501 addComment(c("[WARNING]Too many rows for efficient heatmap drawing",maxRowsToDisplay,"subsampling is done for vizualization only"),T,opt$log,display=FALSE) | |
502 rm(heatMapGenesToKeep) | |
503 }else{ | |
504 effectiveDataToHeatMap=dataToHeatMap | |
505 effectiveRowClust=rowClust | |
506 effectiveNbClusters=nbClusters | |
507 } | |
508 | |
509 addComment(c("[INFO]Dim of plotted heatmap matrix:",dim(effectiveDataToHeatMap)),T,opt$log,display=FALSE) | |
510 | |
511 personalized_hoverinfo=matrix("",ncol = ncol(effectiveDataToHeatMap),nrow = nrow(effectiveDataToHeatMap),dimnames = dimnames(effectiveDataToHeatMap)) | |
512 if(expressionToCluster){ | |
513 for(iCol in colnames(effectiveDataToHeatMap)){for(iRow in rownames(effectiveDataToHeatMap)){personalized_hoverinfo[iRow,iCol]=paste(c("Probe: ",iRow,"\nCondition: ",iCol,"\n",valueMeaning,": ",effectiveDataToHeatMap[iRow,iCol]),collapse="")}} | |
514 }else{ | |
515 for(iCol in colnames(effectiveDataToHeatMap)){for(iRow in rownames(effectiveDataToHeatMap)){personalized_hoverinfo[iRow,iCol]=paste(c("Probe: ",iRow,"\nCondition: ",iCol,"\nFC: ",round(2^effectiveDataToHeatMap[iRow,iCol],2)),collapse="")}} | |
516 } | |
517 | |
518 #trying to overcome limitation of heatmaply package to modify xtick and ytick label, using directly plotly functions, but for now plotly do not permit to have personalized color for each x/y tick separately | |
519 test=FALSE | |
520 if(test==TRUE){ | |
521 | |
522 #define dendogram shapes | |
523 dd.row <- as.dendrogram(effectiveRowClust) | |
524 dd.col <- as.dendrogram(colClust) | |
525 | |
526 #and color them | |
527 dd.row=color_branches(dd.row, k = effectiveNbClusters, groupLabels = T) | |
528 dd.col=color_branches(dd.col, k = nbSampleClusters, groupLabels = T) | |
529 | |
530 #generating function for dendogram from segment list | |
531 ggdend <- function(df) { | |
532 ggplot() + | |
533 geom_segment(data = df, aes(x=x, y=y, xend=xend, yend=yend)) + | |
534 labs(x = "", y = "") + theme_minimal() + | |
535 theme(axis.text = element_blank(), axis.ticks = element_blank(), | |
536 panel.grid = element_blank()) | |
537 } | |
538 | |
539 # generate x/y dendogram plots | |
540 px <- ggdend(dendro_data(dd.col)$segments) | |
541 py <- ggdend(dendro_data(dd.row)$segments) + coord_flip() | |
542 | |
543 # reshape data matrix | |
544 col.ord <- order.dendrogram(dd.col) | |
545 row.ord <- order.dendrogram(dd.row) | |
546 xx <- effectiveDataToHeatMap[row.ord, col.ord] | |
547 # and also personalized_hoverinfo | |
548 personalized_hoverinfo=personalized_hoverinfo[row.ord, col.ord] | |
549 | |
550 # hide axis ticks and grid lines | |
551 eaxis <- list( | |
552 showticklabels = FALSE, | |
553 showgrid = FALSE, | |
554 zeroline = FALSE | |
555 ) | |
556 | |
557 #make the empty plot | |
558 p_empty <- plot_ly() %>% | |
559 layout(margin = list(l = 200), | |
560 xaxis = eaxis, | |
561 yaxis = eaxis) | |
562 | |
563 heatmap.plotly <- plot_ly( | |
564 z = xx, x = 1:ncol(xx), y = 1:nrow(xx), colors = viridis(n = 101, alpha = 1, begin = 0, end = 1, option = "inferno"), | |
565 type = "heatmap", showlegend = FALSE, text = personalized_hoverinfo, hoverinfo = "text", | |
566 colorbar = list( | |
567 # Capitalise first letter | |
568 title = valueMeaning, | |
569 tickmode = "array", | |
570 len = 0.3 | |
571 ) | |
572 ) %>% | |
573 layout( | |
574 xaxis = list( | |
575 tickfont = list(size = 10,color=get_leaves_branches_col(dd.row)), | |
576 tickangle = 45, | |
577 tickvals = 1:ncol(xx), ticktext = colnames(xx), | |
578 linecolor = "#ffffff", | |
579 range = c(0.5, ncol(xx) + 0.5), | |
580 showticklabels = TRUE | |
581 ), | |
582 yaxis = list( | |
583 tickfont = list(size = 10, color=get_leaves_branches_col(dd.col)), | |
584 tickangle = 0, | |
585 tickvals = 1:nrow(xx), ticktext = rownames(xx), | |
586 linecolor = "#ffffff", | |
587 range = c(0.5, nrow(xx) + 0.5), | |
588 showticklabels = TRUE | |
589 ) | |
590 ) | |
591 | |
592 #generate plotly | |
593 pp <- subplot(px, p_empty, heatmap.plotly, py, nrows = 2, margin = 0,widths = c(0.8,0.2),heights = c(0.2,0.8), shareX = TRUE, | |
594 shareY = TRUE) | |
595 | |
596 #save image file | |
597 export(pp, file = paste(c(file.path(getwd(), "plotDir"),"/Heatmap.",opt$format),collapse="")) | |
598 #rise a bug due to token stuf | |
599 #orca(pp, file = paste(c(file.path(getwd(), "plotDir"),"/Heatmap.",opt$format),collapse="")) | |
600 | |
601 | |
602 #save plotLy file | |
603 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/Heatmap.html"),collapse=""),selfcontained = F) | |
604 | |
605 #htmlwidgets::saveWidget(as_widget(pp),"~/Bureau/test.html",selfcontained = F) | |
606 | |
607 }else{ #test | |
608 label_names=c("Probe","Condition",valueMeaning) | |
609 | |
610 # #color hclust objects | |
611 # dd.row=color_branches(effectiveRowClust, k = effectiveNbClusters) | |
612 # #rowColors=get_leaves_branches_col(dd.row) | |
613 # #rowColors[order.dendrogram(dd.row)]=rowColors | |
614 # rowGroup=cutree(effectiveRowClust, k = effectiveNbClusters) | |
615 # | |
616 # #get order of class as they will be displayed on the dendogram | |
617 # rowGroupRenamed=data.frame(cluster=mapvalues(rowGroup, unique(rowGroup[order.dendrogram(dd.row)[nleaves(dd.row):1]]), 1:effectiveNbClusters)) | |
618 # | |
619 # dd.col=color_branches(colClust, k = nbSampleClusters) | |
620 # #colColors=get_leaves_branches_col(dd.col) | |
621 # #colColors[order.dendrogram(dd.col)]=colColors | |
622 # colGroup=cutree(colClust, k = nbSampleClusters) | |
623 # | |
624 # # #get order of class as they will be displayed on the dendogram | |
625 # colGroupRenamed=data.frame(sampleCluster=mapvalues(colGroup, unique(colGroup[order.dendrogram(dd.col)[nleaves(dd.col):1]]), 1:nbSampleClusters)) | |
626 | |
627 | |
628 #while option is not correctly managed by heatmap apply, put personalized_hoverinfo to NULL | |
629 personalized_hoverinfo=NULL | |
630 | |
631 if(is.null(opt$personalColors)){ | |
632 heatmapColors=viridis(n = 101, alpha = 1, begin = 0, end = 1, option = "inferno") | |
633 }else{ | |
634 heatmapColors=personalColors | |
635 } | |
636 | |
637 colGroupRenamed=NULL | |
638 if(!is.null(factorInfoMatrix)){ | |
639 colGroupRenamed=eval(parse(text=(paste("data.frame(",factorBarColor,"=factorInfoMatrix[colnames(effectiveDataToHeatMap),2])",sep="")))) | |
640 sideBarGroupNb=length(table(factorInfoMatrix[colnames(effectiveDataToHeatMap),2])) | |
641 sideBarColorPaletteName="Spectral" | |
642 if(!is.null(opt$sideBarColorPalette) && opt$sideBarColorPalette%in%rownames(RColorBrewer::brewer.pal.info)){ | |
643 sideBarColorPaletteName=opt$sideBarColorPalette | |
644 } | |
645 sideBarColorPalette=setNames(colorRampPalette(RColorBrewer::brewer.pal(RColorBrewer::brewer.pal.info[sideBarColorPaletteName,"maxcolors"], sideBarColorPaletteName))(sideBarGroupNb),unique(factorInfoMatrix[colnames(effectiveDataToHeatMap),2])) | |
646 } | |
647 | |
648 if(!is.null(colGroupRenamed)){ | |
649 pp <- heatmaply(effectiveDataToHeatMap,key.title = valueMeaning,k_row=effectiveNbClusters,k_col=nbSampleClusters,col_side_colors=colGroupRenamed,col_side_palette=sideBarColorPalette,Rowv=effectiveRowClust,Colv=colClust,label_names=label_names,custom_hovertext=personalized_hoverinfo,plot_method = "plotly",colors = heatmapColors) | |
650 }else{ | |
651 pp <- heatmaply(effectiveDataToHeatMap,key.title = valueMeaning,k_row=effectiveNbClusters,k_col=nbSampleClusters,Rowv=effectiveRowClust,Colv=colClust,label_names=label_names,custom_hovertext=personalized_hoverinfo,plot_method = "plotly",colors = heatmapColors) | |
652 } | |
653 | |
654 | |
655 #save image file | |
656 export(pp, file = paste(c(file.path(getwd(), "plotDir"),"/Heatmap.",opt$format),collapse="")) | |
657 #rise a bug due to token stuf | |
658 #orca(pp, file = paste(c(file.path(getwd(), "plotDir"),"/Heatmap.",opt$format),collapse="")) | |
659 | |
660 | |
661 #save plotLy file | |
662 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/Heatmap.html"),collapse=""),selfcontained = F) | |
663 | |
664 } | |
665 addComment("[INFO]Heatmap drawn",T,opt$log,display=FALSE) | |
666 | |
667 | |
668 #plot circular heatmap | |
669 if(!class(effectiveRowClust)=="logical"){ | |
670 dendo=as.dendrogram(effectiveRowClust) | |
671 | |
672 if(is.null(opt$personalColors)){ | |
673 col_fun = colorRamp2(quantile(effectiveDataToHeatMap,probs = seq(0,1,0.01)), viridis(101,option = "inferno")) | |
674 }else{ | |
675 col_fun = colorRamp2(quantile(effectiveDataToHeatMap,probs = seq(0,1,0.5)), personalColors) | |
676 } | |
677 | |
678 if(opt$format=="pdf"){ | |
679 pdf(paste(c("./plotDir/circularPlot.pdf"),collapse=""))}else{ | |
680 png(paste(c("./plotDir/circularPlot.png"),collapse="")) | |
681 } | |
682 | |
683 circos.par(cell.padding = c(0, 0, 0, 0), gap.degree = 5) | |
684 circos.initialize(c(rep("a",nrow(effectiveDataToHeatMap)),"b"),xlim=cbind(c(0,0),c(nrow(effectiveDataToHeatMap),5))) | |
685 circos.track(ylim = c(0, 1), bg.border = NA, panel.fun = function(x, y) { | |
686 if(CELL_META$sector.index=="a"){ | |
687 nr = ncol(effectiveDataToHeatMap) | |
688 nc = nrow(effectiveDataToHeatMap) | |
689 circos.text(1:nc- 0.5, rep(0,nc), adj = c(0, 0), | |
690 rownames(effectiveDataToHeatMap)[order.dendrogram(dendo)], facing = "clockwise", niceFacing = TRUE, cex = 0.3) | |
691 } | |
692 }) | |
693 | |
694 circos.track(ylim = c(0, ncol(effectiveDataToHeatMap)), bg.border = NA, panel.fun = function(x, y) { | |
695 | |
696 m = t(matrix(effectiveDataToHeatMap[order.dendrogram(dendo),],ncol=ncol(effectiveDataToHeatMap))) | |
697 col_mat = col_fun(m) | |
698 nr = nrow(m) | |
699 nc = ncol(m) | |
700 if(CELL_META$sector.index=="a"){ | |
701 for(i in 1:nr) { | |
702 circos.rect(1:nc - 1, rep(nr - i, nc), | |
703 1:nc, rep(nr - i + 1, nc), | |
704 border = col_mat[i, ], col = col_mat[i, ]) | |
705 } | |
706 }else{ | |
707 circos.text(rep(1,nr), seq(nr,1,-1) , colnames(effectiveDataToHeatMap),cex = 0.3) | |
708 } | |
709 }) | |
710 | |
711 #dendo = color_branches(dendo, k = effectiveNbClusters, col = colorRampPalette(brewer.pal(12,"Set3"))(effectiveNbClusters)) | |
712 dendo = color_branches(dendo, k = effectiveNbClusters, col = rev(colorspace::rainbow_hcl(effectiveNbClusters))) | |
713 | |
714 | |
715 circos.track(ylim = c(0, attributes(dendo)$height), bg.border = NA, track.height = 0.25, | |
716 panel.fun = function(x, y) { | |
717 if(CELL_META$sector.index=="a")circos.dendrogram(dendo)} ) | |
718 | |
719 circos.clear() | |
720 ##add legend | |
721 lgd_links = Legend(at = seq(ceiling(min(effectiveDataToHeatMap)),floor(max(effectiveDataToHeatMap)),ceiling((floor(max(effectiveDataToHeatMap))-ceiling(min(effectiveDataToHeatMap)))/4)), col_fun = col_fun, | |
722 title_position = "topleft", grid_width = unit(5, "mm") ,title = valueMeaning) | |
723 | |
724 pushViewport(viewport(x = 0.85, y = 0.80, | |
725 width = 0.1, | |
726 height = 0.1, | |
727 just = c("left", "bottom"))) | |
728 grid.draw(lgd_links) | |
729 upViewport() | |
730 | |
731 | |
732 dev.off() | |
733 | |
734 addComment("[INFO]Circular heatmap drawn",T,opt$log,display=FALSE) | |
735 loc <- Sys.setlocale("LC_NUMERIC","C") | |
736 }else{ | |
737 addComment(c("[WARNING]Circular plot will not be plotted considering row or cluster number < 2"),T,opt$log,display=FALSE) | |
738 } | |
739 rm(effectiveDataToHeatMap,effectiveRowClust,effectiveNbClusters) | |
740 | |
741 #plot screeplot | |
742 if(class(rowClust)!="logical" && nrow(dataToHeatMap)>2){ | |
743 screePlotData=c() | |
744 for(iNbClusters in 2:(nbClusters+min(10,max(0,nrow(dataToHeatMap)-nbClusters)))){ | |
745 clusteringResults=cutree(rowClust,iNbClusters) | |
746 #clusteringResults=kmeans(dataToHeatMap,iNbClusters)$cluster | |
747 | |
748 #compute variance between each intra-class points amongst themselves (need at least 3 points by cluster) | |
749 #screePlotData=c(screePlotData,sum(unlist(lapply(seq(1,iNbClusters),function(x){temp=which(clusteringResults==x);if(length(temp)>2){var(dist(dataToHeatMap[temp,]))}else{0}}))) ) | |
750 #compute variance between each intra-class points and fictive mean point (need at least 2 points by cluster) | |
751 #screePlotData=c(screePlotData,sum(unlist(lapply(seq(1,iNbClusters),function(x){temp=which(clusteringResults==x);if(length(temp)>1){ var(dist(rbind(apply(dataToHeatMap[temp,],2,mean),dataToHeatMap[temp,]))[1:length(temp)]) }else{0}}))) ) | |
752 if(ncol(dataToHeatMap)>1)screePlotData=c(screePlotData,sum(unlist(lapply(seq(1,iNbClusters),function(x){temp=which(clusteringResults==x);if(length(temp)>1){ sum((distExtended(rbind(apply(dataToHeatMap[temp,],2,mean),dataToHeatMap[temp,]),method = opt$distanceMeasure)[1:length(temp)])^2) }else{0}}))) ) | |
753 else screePlotData=c(screePlotData,sum(unlist(lapply(seq(1,iNbClusters),function(x){temp=which(clusteringResults==x);if(length(temp)>1){ sum((dataToHeatMap[temp,]-mean(dataToHeatMap[temp,]))^2) }else{0}}))) ) | |
754 } | |
755 | |
756 dataToPlot=data.frame(clusterNb=seq(2,length(screePlotData)+1),wcss=screePlotData) | |
757 p <- ggplot(data=dataToPlot, aes(clusterNb,wcss)) + geom_point(colour="#EE4444") + geom_line(colour="#DD9999") + | |
758 ggtitle("Scree plot") + theme_bw() + xlab(label="Cluster number") + ylab(label="Within cluster sum of squares") + | |
759 theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),legend.position = "none") + | |
760 scale_x_continuous(breaks=seq(min(dataToPlot$clusterNb), max(dataToPlot$clusterNb), 1)) | |
761 | |
762 #save plotly files | |
763 pp <- ggplotly(p) | |
764 | |
765 if(opt$format=="pdf"){ | |
766 pdf(paste(c("./plotDir/screePlot.pdf"),collapse=""))}else{ | |
767 png(paste(c("./plotDir/screePlot.png"),collapse="")) | |
768 } | |
769 plot(p) | |
770 dev.off() | |
771 | |
772 #save plotly files | |
773 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/screePlot.html"),collapse=""),selfcontained = F) | |
774 | |
775 addComment("[INFO]Scree plot drawn",T,opt$log,display=FALSE) | |
776 }else{ | |
777 addComment(c("[WARNING]Scree plot will not be plotted considering row number <= 2"),T,opt$log,display=FALSE) | |
778 } | |
779 | |
780 ##---------------------- | |
781 | |
782 #filter output based on parameters | |
783 | |
784 rowToKeep=rownames(dataToHeatMap) | |
785 if(!is.null(opt$filterInputOutput) && opt$filterInputOutput=="output"){ | |
786 #rowToKeep=intersect(which(comparisonMatrix[,seq(2,ncol(comparisonMatrix),4)]<=opt$pvalThreshold),which(abs(comparisonMatrix[,seq(4,ncol(comparisonMatrix),4)])>=log2(opt$FCthreshold))) | |
787 if(is.null(opt$geneListFiltering)){ | |
788 if(is.null(opt$genericData)){ | |
789 #diff. expression matrix | |
790 rowToKeep=names(which(unlist(apply(comparisonMatrix,1,function(x)length(intersect(which(x[seq(2,length(x),nbColPerContrast)]<=opt$pvalThreshold),which(abs(x[seq(4,length(x),nbColPerContrast)])>=log2(opt$FCthreshold))))!=0)))) | |
791 }else{ | |
792 #generic filtering matrix | |
793 rowToKeep=rownames(comparisonMatrix) | |
794 if(!is.null(opt$comparisonNameLow)){ | |
795 restrictedLowComparisons=unlist(strsplit(opt$comparisonNameLow,",")) | |
796 rowToKeep=intersect(rowToKeep,names(which(unlist(apply(comparisonMatrix,1,function(x)length(which(x[restrictedLowComparisons]>opt$FCthreshold))!=0))))) | |
797 } | |
798 if(!is.null(opt$comparisonNameHigh)){ | |
799 restrictedHighComparisons=unlist(strsplit(opt$comparisonNameHigh,",")) | |
800 rowToKeep=intersect(rowToKeep,names(which(unlist(apply(comparisonMatrix,1,function(x)length(which(x[restrictedHighComparisons]<opt$pvalThreshold))!=0))))) | |
801 } | |
802 } | |
803 }else{ | |
804 geneListFiltering=read.csv(opt$geneListFiltering,as.is = 1,header=F) | |
805 rowToKeep=unlist(c(geneListFiltering)) | |
806 } | |
807 if(!is.null(comparisonMatrix) && !all(rowToKeep%in%rownames(comparisonMatrix))){ | |
808 #should arrive only with user gene list filtering with diff.exp. results clustering | |
809 addComment("[WARNING] some genes of the user defined list are not in the diff. exp. input file",T,opt$log) | |
810 rowToKeep=intersect(rowToKeep,rownames(comparisonMatrix)) | |
811 } | |
812 | |
813 if(expressionToCluster && !all(rowToKeep%in%rownames(expressionMatrix))){ | |
814 addComment("[WARNING] some genes selected by the output filter are not in the expression file",T,opt$log) | |
815 rowToKeep=intersect(rowToKeep,rownames(expressionMatrix)) | |
816 } | |
817 addComment(c("[INFO]Output filtering step:",length(rowToKeep),"remaining rows"),T,opt$log,display=FALSE) | |
818 } | |
819 | |
820 #we add differential analysis info in output if it was directly used for clustering or when it was used for filtering with expression | |
821 | |
822 #in case of expression or generic data clustering without filtering based on external stats | |
823 if(expressionToCluster && is.null(comparisonMatrix)){ | |
824 if(length(rowToKeep)==0){ | |
825 addComment("[WARNING]No more gene after output filtering step, tabular output will be empty",T,opt$log,display=FALSE) | |
826 outputData=matrix(c("Gene","Cluster","noGene","noClustering"),ncol=2,nrow=2,byrow = TRUE) | |
827 }else{ | |
828 outputData=matrix(0,ncol=2,nrow=length(rowToKeep)+1) | |
829 outputData[1,]=c("Gene","Cluster") | |
830 outputData[2:(length(rowToKeep)+1),1]=rowToKeep | |
831 if(class(rowClust)!="logical" ){ | |
832 outputData[2:(length(rowToKeep)+1),2]=cutree(rowClust,nbClusters)[rowToKeep] | |
833 }else{ | |
834 outputData[2:(length(rowToKeep)+1),2]=0 | |
835 } | |
836 } | |
837 } | |
838 | |
839 #in case of generic data clustering with filtering based on generic external data | |
840 if(!is.null(opt$genericData) && !is.null(comparisonMatrix)){ | |
841 if(length(rowToKeep)==0){ | |
842 addComment("[WARNING]No more gene after output filtering step, tabular output will be empty",T,opt$log,display=FALSE) | |
843 outputData=matrix(c("Gene","Cluster","noGene","noClustering"),ncol=2,nrow=2,byrow = TRUE) | |
844 }else{ | |
845 outputData=matrix(0,ncol=2+nbComparisons,nrow=length(rowToKeep)+1) | |
846 outputData[1,]=c("Gene","Cluster",colnames(comparisonMatrix)) | |
847 outputData[2:(length(rowToKeep)+1),1]=rowToKeep | |
848 if(class(rowClust)!="logical" ){ | |
849 outputData[2:(length(rowToKeep)+1),2]=cutree(rowClust,nbClusters)[rowToKeep] | |
850 }else{ | |
851 outputData[2:(length(rowToKeep)+1),2]=0 | |
852 } | |
853 outputData[2:(length(rowToKeep)+1),3:(ncol(comparisonMatrix)+2)]=prettyNum(comparisonMatrix[rowToKeep,],digits=4) | |
854 } | |
855 } | |
856 | |
857 #in case of expression data clustering with filtering based on diff. exp. results or diff. exp. results clustering | |
858 if(is.null(opt$genericData) && !is.null(comparisonMatrix)){ | |
859 if(length(rowToKeep)==0){ | |
860 addComment("[WARNING]No more gene after output filtering step, tabular output will be empty",T,opt$log,display=FALSE) | |
861 outputData=matrix(0,ncol=3,nrow=3) | |
862 outputData[1,]=c("","","Comparison") | |
863 outputData[2,]=c("Gene","Info","Cluster") | |
864 outputData[3,]=c("noGene","noInfo","noClustering") | |
865 }else{ | |
866 outputData=matrix(0,ncol=3+nbComparisons*nbColPerContrast,nrow=length(rowToKeep)+2) | |
867 outputData[1,]=c("","","Comparison",rep(colnames(comparisonMatrix)[seq(1,ncol(comparisonMatrix),nbColPerContrast)],each=nbColPerContrast)) | |
868 outputData[2,]=c("Gene","Info","Cluster",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),nbComparisons)) | |
869 outputData[3:(length(rowToKeep)+2),1]=rowToKeep | |
870 outputData[3:(length(rowToKeep)+2),2]=comparisonMatrixInfoGene[rowToKeep] | |
871 if(class(rowClust)!="logical" ){ | |
872 outputData[3:(length(rowToKeep)+2),3]=cutree(rowClust,nbClusters)[rowToKeep] | |
873 }else{ | |
874 outputData[3:(length(rowToKeep)+2),3]=0 | |
875 } | |
876 outputData[3:(length(rowToKeep)+2),4:(ncol(comparisonMatrix)+3)]=prettyNum(comparisonMatrix[rowToKeep,],digits=4) | |
877 } | |
878 } | |
879 | |
880 addComment("[INFO]Formated output",T,opt$log,display=FALSE) | |
881 write.table(outputData,file=opt$outputFile,quote=FALSE,sep="\t",col.names = F,row.names = F) | |
882 | |
883 ##---------------------- | |
884 | |
885 end.time <- Sys.time() | |
886 addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE) | |
887 | |
888 | |
889 addComment("[INFO]End of R script",T,opt$log,display=FALSE) | |
890 | |
891 printSessionInfo(opt$log) | |
892 | |
893 #sessionInfo() | |
894 | |
895 | |
896 |