Mercurial > repos > vandelj > giant_hierarchical_clustering
comparison src/heatMapClustering.R @ 0:14045c80a222 draft
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit cb276a594444c8f32e9819fefde3a21f121d35df"
| author | vandelj | 
|---|---|
| date | Fri, 26 Jun 2020 09:38:23 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:14045c80a222 | 
|---|---|
| 1 # A command-line interface to plot heatmap based on expression or diff. exp. analysis | |
| 2 # written by Jimmy Vandel | |
| 3 # one of these arguments is required: | |
| 4 # | |
| 5 # | |
| 6 initial.options <- commandArgs(trailingOnly = FALSE) | |
| 7 file.arg.name <- "--file=" | |
| 8 script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)]) | |
| 9 script.basename <- dirname(script.name) | |
| 10 source(file.path(script.basename, "utils.R")) | |
| 11 source(file.path(script.basename, "getopt.R")) | |
| 12 | |
| 13 #addComment("Welcome R!") | |
| 14 | |
| 15 # setup R error handling to go to stderr | |
| 16 options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | |
| 17 | |
| 18 # we need that to not crash galaxy with an UTF8 error on German LC settings. | |
| 19 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
| 20 loc <- Sys.setlocale("LC_NUMERIC", "C") | |
| 21 | |
| 22 #get starting time | |
| 23 start.time <- Sys.time() | |
| 24 | |
| 25 | |
| 26 options(stringAsfactors = FALSE, useFancyQuotes = FALSE, OutDec=".") | |
| 27 | |
| 28 #get options | |
| 29 args <- commandArgs() | |
| 30 | |
| 31 # get options, using the spec as defined by the enclosed list. | |
| 32 # we read the options from the default: commandArgs(TRUE). | |
| 33 spec <- matrix(c( | |
| 34 "expressionFile", "x", 1, "character", | |
| 35 "diffAnalyseFile", "x", 1, "character", | |
| 36 "factorInfo","x", 1, "character", | |
| 37 "genericData","x", 0, "logical", | |
| 38 "comparisonName","x",1,"character", | |
| 39 "comparisonNameLow","x",1,"character", | |
| 40 "comparisonNameHigh","x",1,"character", | |
| 41 "filterInputOutput","x", 1, "character", | |
| 42 "FCthreshold","x", 1, "double", | |
| 43 "pvalThreshold","x", 1, "double", | |
| 44 "geneListFiltering","x",1,"character", | |
| 45 "clusterNumber","x",1,"integer", | |
| 46 "maxRows","x",1,"integer", | |
| 47 "sampleClusterNumber","x",1,"integer", | |
| 48 "dataTransformation","x",1,"character", | |
| 49 "distanceMeasure","x",1,"character", | |
| 50 "aggloMethod","x",1,"character", | |
| 51 "personalColors","x",1,"character", | |
| 52 "sideBarColorPalette","x",1,"character", | |
| 53 "format", "x", 1, "character", | |
| 54 "quiet", "x", 0, "logical", | |
| 55 "log", "x", 1, "character", | |
| 56 "outputFile" , "x", 1, "character"), | |
| 57 byrow=TRUE, ncol=4) | |
| 58 opt <- getoptLong(spec) | |
| 59 | |
| 60 # enforce the following required arguments | |
| 61 if (is.null(opt$log)) { | |
| 62 addComment("[ERROR]'log file' is required") | |
| 63 q( "no", 1, F ) | |
| 64 } | |
| 65 addComment("[INFO]Start of R script",T,opt$log,display=FALSE) | |
| 66 if (is.null(opt$format)) { | |
| 67 addComment("[ERROR]'output format' is required",T,opt$log) | |
| 68 q( "no", 1, F ) | |
| 69 } | |
| 70 if (is.null(opt$outputFile)) { | |
| 71 addComment("[ERROR]'output file' is required",T,opt$log) | |
| 72 q( "no", 1, F ) | |
| 73 } | |
| 74 | |
| 75 if(is.null(opt$expressionFile) && !is.null(opt$genericData)){ | |
| 76 addComment("[ERROR]generic data clustering is based on expression clustering",T,opt$log) | |
| 77 q( "no", 1, F ) | |
| 78 } | |
| 79 | |
| 80 if (is.null(opt$clusterNumber) || opt$clusterNumber<2) { | |
| 81 addComment("[ERROR]valid genes clusters number is required",T,opt$log) | |
| 82 q( "no", 1, F ) | |
| 83 } | |
| 84 | |
| 85 if (is.null(opt$sampleClusterNumber) || opt$sampleClusterNumber<1) { | |
| 86 addComment("[ERROR]valid samples clusters number is required",T,opt$log) | |
| 87 q( "no", 1, F ) | |
| 88 } | |
| 89 | |
| 90 if (is.null(opt$dataTransformation)) { | |
| 91 addComment("[ERROR]data transformation option is required",T,opt$log) | |
| 92 q( "no", 1, F ) | |
| 93 } | |
| 94 | |
| 95 if (is.null(opt$distanceMeasure)) { | |
| 96 addComment("[ERROR]distance measure option is required",T,opt$log) | |
| 97 q( "no", 1, F ) | |
| 98 } | |
| 99 | |
| 100 if (is.null(opt$aggloMethod)) { | |
| 101 addComment("[ERROR]agglomeration method option is required",T,opt$log) | |
| 102 q( "no", 1, F ) | |
| 103 } | |
| 104 | |
| 105 if (is.null(opt$maxRows) || opt$maxRows<2) { | |
| 106 addComment("[ERROR]valid plotted row number is required",T,opt$log) | |
| 107 q( "no", 1, F ) | |
| 108 } | |
| 109 | |
| 110 if (!is.null(opt[["comparisonName"]]) && nchar(opt[["comparisonName"]])==0){ | |
| 111 addComment("[ERROR]you have to specify comparison",T,opt$log) | |
| 112 q( "no", 1, F ) | |
| 113 } | |
| 114 | |
| 115 if (!is.null(opt$comparisonNameLow) && nchar(opt$comparisonNameLow)==0){ | |
| 116 addComment("[ERROR]you have to specify comparisonLow",T,opt$log) | |
| 117 q( "no", 1, F ) | |
| 118 } | |
| 119 | |
| 120 if (!is.null(opt$comparisonNameHigh) && nchar(opt$comparisonNameHigh)==0){ | |
| 121 addComment("[ERROR]you have to specify comparisonHigh",T,opt$log) | |
| 122 q( "no", 1, F ) | |
| 123 } | |
| 124 | |
| 125 if (is.null(opt$genericData) && (!is.null(opt$comparisonNameLow) || !is.null(opt$comparisonNameHigh))){ | |
| 126 addComment("[ERROR]comparisonLow and comparisonHigh can be specified only with generic data",T,opt$log) | |
| 127 q( "no", 1, F ) | |
| 128 } | |
| 129 | |
| 130 if (!is.null(opt$genericData) && !is.null(opt[["comparisonName"]])){ | |
| 131 addComment("[ERROR]basic comparison cannot be specified for generic data",T,opt$log) | |
| 132 q( "no", 1, F ) | |
| 133 } | |
| 134 | |
| 135 if ((!is.null(opt[["comparisonName"]]) || !is.null(opt$comparisonNameLow) || !is.null(opt$comparisonNameHigh)) && is.null(opt$diffAnalyseFile)) { | |
| 136 addComment("[ERROR]'diff. exp. analysis file' is required",T,opt$log) | |
| 137 q( "no", 1, F ) | |
| 138 } | |
| 139 | |
| 140 if (!is.null(opt$genericData) && !is.null(opt$diffAnalyseFile) && is.null(opt$comparisonNameLow) && is.null(opt$comparisonNameHigh)){ | |
| 141 addComment("[ERROR]Missing comparison information for filtering",T,opt$log) | |
| 142 q( "no", 1, F ) | |
| 143 } | |
| 144 | |
| 145 if ((!is.null(opt$FCthreshold) || !is.null(opt$pvalThreshold)) && (is.null(opt[["comparisonName"]]) && is.null(opt$comparisonNameLow) && is.null(opt$comparisonNameHigh))) { | |
| 146 addComment("[ERROR]'comparisons' are missing for filtering",T,opt$log) | |
| 147 q( "no", 1, F ) | |
| 148 } | |
| 149 | |
| 150 if ((!is.null(opt$FCthreshold) || !is.null(opt$pvalThreshold)) && !is.null(opt$geneListFiltering)) { | |
| 151 addComment("[ERROR]Cannot have two filtering strategies",T,opt$log) | |
| 152 q( "no", 1, F ) | |
| 153 } | |
| 154 | |
| 155 verbose <- if (is.null(opt$quiet)) { | |
| 156 TRUE | |
| 157 }else{ | |
| 158 FALSE} | |
| 159 | |
| 160 addComment("[INFO]Parameters checked!",T,opt$log,display=FALSE) | |
| 161 | |
| 162 addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE) | |
| 163 addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE) | |
| 164 | |
| 165 #directory for plots and HTML | |
| 166 dir.create(file.path(getwd(), "plotDir")) | |
| 167 dir.create(file.path(getwd(), "plotLyDir")) | |
| 168 | |
| 169 #silent package loading | |
| 170 suppressPackageStartupMessages({ | |
| 171 library("plotly") | |
| 172 library("dendextend") | |
| 173 #library("ggdendro") | |
| 174 #library("plyr") | |
| 175 library("ggplot2") | |
| 176 library("heatmaply") | |
| 177 library("circlize") | |
| 178 #library("RColorBrewer") | |
| 179 #source("https://bioconductor.org/biocLite.R") | |
| 180 #biocLite("ComplexHeatmap") | |
| 181 library("ComplexHeatmap") | |
| 182 #library("processx") | |
| 183 }) | |
| 184 | |
| 185 expressionToCluster=!is.null(opt$expressionFile) | |
| 186 | |
| 187 #load input data files | |
| 188 if(expressionToCluster){ | |
| 189 #first expression data | |
| 190 expressionMatrix=read.csv(file=opt$expressionFile,header=F,sep="\t",colClasses="character") | |
| 191 #remove first row to convert it as colnames (to avoid X before colnames with header=T) | |
| 192 colNamesData=expressionMatrix[1,-1] | |
| 193 expressionMatrix=expressionMatrix[-1,] | |
| 194 #remove first colum to convert it as rownames | |
| 195 rowNamesData=expressionMatrix[,1] | |
| 196 expressionMatrix=expressionMatrix[,-1] | |
| 197 if(is.data.frame(expressionMatrix)){ | |
| 198 expressionMatrix=data.matrix(expressionMatrix) | |
| 199 }else{ | |
| 200 expressionMatrix=data.matrix(as.numeric(expressionMatrix)) | |
| 201 } | |
| 202 dimnames(expressionMatrix)=list(rowNamesData,colNamesData) | |
| 203 | |
| 204 #check input files | |
| 205 if (!is.numeric(expressionMatrix)) { | |
| 206 addComment("[ERROR]Expression data is not fully numeric!",T,opt$log,display=FALSE) | |
| 207 q( "no", 1, F ) | |
| 208 } | |
| 209 | |
| 210 addComment("[INFO]Expression data loaded and checked") | |
| 211 addComment(c("[INFO]Dim of expression matrix:",dim(expressionMatrix)),T,opt$log,display=FALSE) | |
| 212 } | |
| 213 | |
| 214 nbComparisons=0 | |
| 215 nbColPerContrast=5 | |
| 216 comparisonMatrix=NULL | |
| 217 comparisonMatrixInfoGene=NULL | |
| 218 #if available comparisons | |
| 219 if(!is.null(opt[["comparisonName"]])){ | |
| 220 #load results from differential expression analysis | |
| 221 #consider first row contains column names | |
| 222 comparisonMatrix=read.csv(file=opt$diffAnalyseFile,header=F,sep="\t") | |
| 223 colnames(comparisonMatrix)=as.character(unlist(comparisonMatrix[1,])) | |
| 224 #remove the second line also as it's information line (p-val,FDR.p-val,FC,logFC) | |
| 225 comparisonMatrix=comparisonMatrix[-c(1,2),] | |
| 226 #remove first and second colums, convert the first one as rownames | |
| 227 rownames(comparisonMatrix)=as.character(unlist(comparisonMatrix[,1])) | |
| 228 #and save second column content that contain geneInfo | |
| 229 comparisonMatrixInfoGene=as.character(unlist(comparisonMatrix[,2])) | |
| 230 names(comparisonMatrixInfoGene)=as.character(unlist(comparisonMatrix[,1])) | |
| 231 comparisonMatrix=comparisonMatrix[,-c(1,2)] | |
| 232 | |
| 233 comparisonMatrix=matrix(as.numeric(as.matrix(comparisonMatrix)),ncol=ncol(comparisonMatrix),dimnames = dimnames(comparisonMatrix)) | |
| 234 | |
| 235 if (ncol(comparisonMatrix)%%nbColPerContrast != 0) { | |
| 236 addComment("[ERROR]Diff. exp. data does not contain good number of columns per contrast, should contains in this order:p-val,FDR.p-val,FC,log2(FC) and t-stat",T,opt$log,display=FALSE) | |
| 237 q( "no", 1, F ) | |
| 238 } | |
| 239 | |
| 240 if(max(comparisonMatrix[,c(seq(1,ncol(comparisonMatrix),nbColPerContrast),seq(2,ncol(comparisonMatrix),nbColPerContrast))])>1 || min(comparisonMatrix[,c(seq(1,ncol(comparisonMatrix),nbColPerContrast),seq(2,ncol(comparisonMatrix),nbColPerContrast))])<0){ | |
| 241 addComment("[ERROR]Seem that diff. exp. data does not contain correct values for p-val and FDR.p-val columns, should be including in [0,1] interval",T,opt$log,display=FALSE) | |
| 242 q( "no", 1, F ) | |
| 243 } | |
| 244 | |
| 245 if (!is.numeric(comparisonMatrix)) { | |
| 246 addComment("[ERROR]Diff. exp. data is not fully numeric!",T,opt$log,display=FALSE) | |
| 247 q( "no", 1, F ) | |
| 248 } | |
| 249 | |
| 250 if(expressionToCluster && length(setdiff(rownames(comparisonMatrix),rownames(expressionMatrix)))!=0){ | |
| 251 addComment("[WARNING]All genes from diff. exp. file are not included in expression file",T,opt$log,display=FALSE) | |
| 252 } | |
| 253 | |
| 254 if(expressionToCluster && length(setdiff(rownames(expressionMatrix),rownames(comparisonMatrix)))!=0){ | |
| 255 addComment("[WARNING]All genes from expression file are not included in diff. exp. file",T,opt$log,display=FALSE) | |
| 256 } | |
| 257 | |
| 258 addComment("[INFO]Diff. exp. analysis loaded and checked",T,opt$log,display=FALSE) | |
| 259 addComment(c("[INFO]Dim of original comparison matrix:",dim(comparisonMatrix)),T,opt$log,display=FALSE) | |
| 260 | |
| 261 #restrict to user specified comparisons | |
| 262 restrictedComparisons=unlist(strsplit(opt[["comparisonName"]],",")) | |
| 263 #should be improved to avoid selection of column names starting too similarly | |
| 264 colToKeep=which(unlist(lapply(colnames(comparisonMatrix),function(x)any(startsWith(x,restrictedComparisons))))) | |
| 265 comparisonMatrix=matrix(comparisonMatrix[,colToKeep],ncol=length(colToKeep),dimnames = list(rownames(comparisonMatrix),colnames(comparisonMatrix)[colToKeep])) | |
| 266 | |
| 267 #get number of required comparisons | |
| 268 nbComparisons=ncol(comparisonMatrix)/nbColPerContrast | |
| 269 | |
| 270 addComment(c("[INFO]Dim of effective filtering matrix:",dim(comparisonMatrix)),T,opt$log,display=FALSE) | |
| 271 } | |
| 272 | |
| 273 #should be only the case with generic data | |
| 274 if(!is.null(opt$comparisonNameLow) || !is.null(opt$comparisonNameHigh)){ | |
| 275 #load generic data used for filtering | |
| 276 nbColPerContrast=1 | |
| 277 #consider first row contains column names | |
| 278 comparisonMatrix=read.csv(file=opt$diffAnalyseFile,header=F,sep="\t") | |
| 279 colnames(comparisonMatrix)=as.character(unlist(comparisonMatrix[1,])) | |
| 280 #remove first colum, convert the first one as rownames | |
| 281 rownames(comparisonMatrix)=as.character(unlist(comparisonMatrix[,1])) | |
| 282 comparisonMatrix=comparisonMatrix[-1,-1] | |
| 283 | |
| 284 comparisonMatrix=matrix(as.numeric(as.matrix(comparisonMatrix)),ncol=ncol(comparisonMatrix),dimnames = dimnames(comparisonMatrix)) | |
| 285 | |
| 286 if (!is.numeric(comparisonMatrix)) { | |
| 287 addComment("[ERROR]Filtering matrix is not fully numeric!",T,opt$log,display=FALSE) | |
| 288 q( "no", 1, F ) | |
| 289 } | |
| 290 | |
| 291 if(expressionToCluster && length(setdiff(rownames(comparisonMatrix),rownames(expressionMatrix)))!=0){ | |
| 292 addComment("[WARNING]All genes from filtering file are not included in expression file",T,opt$log,display=FALSE) | |
| 293 } | |
| 294 | |
| 295 if(expressionToCluster && length(setdiff(rownames(expressionMatrix),rownames(comparisonMatrix)))!=0){ | |
| 296 addComment("[WARNING]All genes from expression file are not included in filtering file",T,opt$log,display=FALSE) | |
| 297 } | |
| 298 | |
| 299 addComment("[INFO]Filtering file loaded and checked",T,opt$log,display=FALSE) | |
| 300 addComment(c("[INFO]Dim of original filtering matrix:",dim(comparisonMatrix)),T,opt$log,display=FALSE) | |
| 301 | |
| 302 #restrict to user specified comparisons | |
| 303 restrictedComparisons=c() | |
| 304 if(!is.null(opt$comparisonNameLow))restrictedComparisons=unique(c(restrictedComparisons,unlist(strsplit(opt$comparisonNameLow,",")))) | |
| 305 if(!is.null(opt$comparisonNameHigh))restrictedComparisons=unique(c(restrictedComparisons,unlist(strsplit(opt$comparisonNameHigh,",")))) | |
| 306 | |
| 307 if (!all(restrictedComparisons%in%colnames(comparisonMatrix))){ | |
| 308 addComment("[ERROR]Selected columns in filtering file are not present in filtering matrix!",T,opt$log,display=FALSE) | |
| 309 q( "no", 1, F ) | |
| 310 } | |
| 311 comparisonMatrix=matrix(comparisonMatrix[,restrictedComparisons],ncol=length(restrictedComparisons),dimnames = list(rownames(comparisonMatrix),restrictedComparisons)) | |
| 312 | |
| 313 #get number of required comparisons | |
| 314 nbComparisons=ncol(comparisonMatrix) | |
| 315 | |
| 316 addComment(c("[INFO]Dim of effective filtering matrix:",dim(comparisonMatrix)),T,opt$log,display=FALSE) | |
| 317 } | |
| 318 | |
| 319 | |
| 320 | |
| 321 factorInfoMatrix=NULL | |
| 322 if(!is.null(opt$factorInfo)){ | |
| 323 #get group information | |
| 324 #load factors file | |
| 325 factorInfoMatrix=read.csv(file=opt$factorInfo,header=F,sep="\t",colClasses="character") | |
| 326 #remove first row to convert it as colnames | |
| 327 colnames(factorInfoMatrix)=factorInfoMatrix[1,] | |
| 328 factorInfoMatrix=factorInfoMatrix[-1,] | |
| 329 #use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case | |
| 330 rownames(factorInfoMatrix)=factorInfoMatrix[,1] | |
| 331 | |
| 332 factorBarColor=colnames(factorInfoMatrix)[2] | |
| 333 | |
| 334 if(ncol(factorInfoMatrix)>2){ | |
| 335 addComment("[ERROR]Factors file should not contain more than 2 columns",T,opt$log,display=FALSE) | |
| 336 q( "no", 1, F ) | |
| 337 } | |
| 338 | |
| 339 #factor file is used for color band on heatmap, so all expression matrix column should be in the factor file | |
| 340 if(expressionToCluster && length(setdiff(colnames(expressionMatrix),rownames(factorInfoMatrix)))!=0){ | |
| 341 addComment("[ERROR]Missing samples in factor file",T,opt$log,display=FALSE) | |
| 342 q( "no", 1, F ) | |
| 343 } | |
| 344 | |
| 345 #factor file is used for color band on heatmap, so all comparison matrix column should be in the factor file | |
| 346 if(!expressionToCluster && length(setdiff(colnames(comparisonMatrix),rownames(factorInfoMatrix)))!=0){ | |
| 347 addComment("[ERROR]Missing differential contrasts in factor file",T,opt$log,display=FALSE) | |
| 348 q( "no", 1, F ) | |
| 349 } | |
| 350 | |
| 351 addComment("[INFO]Factors OK",T,opt$log,display=FALSE) | |
| 352 addComment(c("[INFO]Dim of factorInfo matrix:",dim(factorInfoMatrix)),T,opt$log,display=FALSE) | |
| 353 } | |
| 354 | |
| 355 if(!is.null(opt$personalColors)){ | |
| 356 ##parse personal colors | |
| 357 personalColors=unlist(strsplit(opt$personalColors,",")) | |
| 358 if(length(personalColors)==2){ | |
| 359 ##add medium color between two to get three colors | |
| 360 personalColors=c(personalColors[1],paste(c("#",as.character(as.hexmode(floor(apply(col2rgb(personalColors),1,mean))))),collapse=""),personalColors[2]) | |
| 361 } | |
| 362 if(length(personalColors)!=3){ | |
| 363 addComment("[ERROR]Personalized colors doesn't contain enough colors",T,opt$log,display=FALSE) | |
| 364 q( "no", 1, F ) | |
| 365 } | |
| 366 | |
| 367 } | |
| 368 | |
| 369 | |
| 370 if(!is.null(opt$filterInputOutput) && opt$filterInputOutput=="input"){ | |
| 371 #filter input data | |
| 372 | |
| 373 if(is.null(opt$geneListFiltering)){ | |
| 374 #filtering using stat thresholds | |
| 375 #rowToKeep=intersect(which(comparisonMatrix[,seq(2,ncol(comparisonMatrix),4)]<=opt$pvalThreshold),which(abs(comparisonMatrix[,seq(4,ncol(comparisonMatrix),4)])>=log2(opt$FCthreshold))) | |
| 376 if(is.null(opt$genericData)){ | |
| 377 #diff. expression matrix | |
| 378 rowToKeep=names(which(unlist(apply(comparisonMatrix,1,function(x)length(intersect(which(x[seq(2,length(x),nbColPerContrast)]<opt$pvalThreshold),which(abs(x[seq(4,length(x),nbColPerContrast)])>log2(opt$FCthreshold))))!=0)))) | |
| 379 }else{ | |
| 380 #generic filtering matrix | |
| 381 rowToKeep=rownames(comparisonMatrix) | |
| 382 if(!is.null(opt$comparisonNameLow)){ | |
| 383 restrictedLowComparisons=unlist(strsplit(opt$comparisonNameLow,",")) | |
| 384 rowToKeep=intersect(rowToKeep,names(which(unlist(apply(comparisonMatrix,1,function(x)length(which(x[restrictedLowComparisons]>opt$FCthreshold))!=0))))) | |
| 385 } | |
| 386 if(!is.null(opt$comparisonNameHigh)){ | |
| 387 restrictedHighComparisons=unlist(strsplit(opt$comparisonNameHigh,",")) | |
| 388 rowToKeep=intersect(rowToKeep,names(which(unlist(apply(comparisonMatrix,1,function(x)length(which(x[restrictedHighComparisons]<opt$pvalThreshold))!=0))))) | |
| 389 } | |
| 390 } | |
| 391 }else{ | |
| 392 #filtering using user gene list | |
| 393 geneListFiltering=read.csv(opt$geneListFiltering,as.is = 1,header=F) | |
| 394 rowToKeep=unlist(c(geneListFiltering)) | |
| 395 } | |
| 396 | |
| 397 if(!is.null(comparisonMatrix) && !all(rowToKeep%in%rownames(comparisonMatrix))){ | |
| 398 #should arrive only with user gene list filtering with diff.exp. results clustering | |
| 399 addComment("[WARNING] some genes of the user defined list are not in the diff. exp. input file",T,opt$log) | |
| 400 rowToKeep=intersect(rowToKeep,rownames(comparisonMatrix)) | |
| 401 } | |
| 402 | |
| 403 if(expressionToCluster && !all(rowToKeep%in%rownames(expressionMatrix))){ | |
| 404 addComment("[WARNING] some genes selected by the input filter are not in the expression file",T,opt$log) | |
| 405 rowToKeep=intersect(rowToKeep,rownames(expressionMatrix)) | |
| 406 } | |
| 407 | |
| 408 if(length(rowToKeep)==0){ | |
| 409 addComment("[ERROR]No gene survived to the input filtering thresholds, execution will be aborted. | |
| 410 Please consider to change threshold values and re-run the tool.",T,opt$log) | |
| 411 q( "no", 1, F ) | |
| 412 } | |
| 413 | |
| 414 #filter comparison matrix | |
| 415 if(!is.null(comparisonMatrix)){ | |
| 416 comparisonMatrix=matrix(comparisonMatrix[rowToKeep,],ncol=ncol(comparisonMatrix),dimnames = list(rowToKeep,colnames(comparisonMatrix))) | |
| 417 if(!is.null(comparisonMatrixInfoGene))comparisonMatrixInfoGene=comparisonMatrixInfoGene[rowToKeep] | |
| 418 } | |
| 419 #then expression matrix | |
| 420 if(expressionToCluster)expressionMatrix=matrix(expressionMatrix[rowToKeep,],ncol=ncol(expressionMatrix),dimnames = list(rowToKeep,colnames(expressionMatrix))) | |
| 421 | |
| 422 if(!is.null(comparisonMatrix) && expressionToCluster && nrow(comparisonMatrix)!=nrow(expressionMatrix)){ | |
| 423 addComment("[ERROR]Problem during input filtering, please check code",T,opt$log,display=FALSE) | |
| 424 q( "no", 1, F ) | |
| 425 } | |
| 426 | |
| 427 addComment("[INFO]Filtering step done",T,opt$log,display=FALSE) | |
| 428 addComment(c("[INFO]Input filtering step:",length(rowToKeep),"remaining rows"),T,opt$log,display=FALSE) | |
| 429 } | |
| 430 | |
| 431 | |
| 432 addComment("[INFO]Ready to plot",T,opt$log,display=FALSE) | |
| 433 | |
| 434 ##--------------------- | |
| 435 | |
| 436 #plot heatmap | |
| 437 if(expressionToCluster){ | |
| 438 #will make clustering based on expression value or generic value | |
| 439 dataToHeatMap=expressionMatrix | |
| 440 valueMeaning="Intensity" | |
| 441 if(!is.null(opt$genericData))valueMeaning="Value" | |
| 442 }else{ | |
| 443 #will make clustering on log2(FC) values | |
| 444 dataToHeatMap=matrix(comparisonMatrix[,seq(4,ncol(comparisonMatrix),nbColPerContrast)],ncol=nbComparisons,dimnames = list(rownames(comparisonMatrix),colnames(comparisonMatrix)[seq(1,ncol(comparisonMatrix),nbColPerContrast)])) | |
| 445 valueMeaning="Log2(FC)" | |
| 446 } | |
| 447 addComment(c("[INFO]Dim of heatmap matrix:",dim(dataToHeatMap)),T,opt$log,display=FALSE) | |
| 448 | |
| 449 if(nrow(dataToHeatMap)==1 && ncol(dataToHeatMap)==1){ | |
| 450 addComment("[ERROR]Cannot make clustering with unique cell tab",T,opt$log,display=FALSE) | |
| 451 q( "no", 1, F ) | |
| 452 } | |
| 453 | |
| 454 | |
| 455 #apply data transformation if needed | |
| 456 if(opt$dataTransformation=="log"){ | |
| 457 dataToHeatMap=log(dataToHeatMap) | |
| 458 valueMeaning=paste(c("log(",valueMeaning,")"),collapse="") | |
| 459 addComment("[INFO]Data to cluster and to display in the heatmap are log transformed",T,opt$log,display=FALSE) | |
| 460 } | |
| 461 if(opt$dataTransformation=="log2"){ | |
| 462 dataToHeatMap=log2(dataToHeatMap) | |
| 463 valueMeaning=paste(c("log2(",valueMeaning,")"),collapse="") | |
| 464 addComment("[INFO]Data to cluster and to display in the heatmap are log2 transformed",T,opt$log,display=FALSE) | |
| 465 } | |
| 466 | |
| 467 maxRowsToDisplay=opt$maxRows | |
| 468 | |
| 469 nbClusters=opt$clusterNumber | |
| 470 if(nbClusters>nrow(dataToHeatMap)){ | |
| 471 #correct number of clusters if needed | |
| 472 nbClusters=nrow(dataToHeatMap) | |
| 473 addComment(c("[WARNING]Not enough rows to reach required clusters number, it is reduced to number of rows:",nbClusters),T,opt$log,display=FALSE) | |
| 474 } | |
| 475 | |
| 476 nbSampleClusters=opt$sampleClusterNumber | |
| 477 if(nbSampleClusters>ncol(dataToHeatMap)){ | |
| 478 #correct number of clusters if needed | |
| 479 nbSampleClusters=ncol(dataToHeatMap) | |
| 480 addComment(c("[WARNING]Not enough columns to reach required conditions clusters number, it is reduced to number of columns:",nbSampleClusters),T,opt$log,display=FALSE) | |
| 481 } | |
| 482 | |
| 483 colClust=FALSE | |
| 484 rowClust=FALSE | |
| 485 effectiveRowClust=FALSE | |
| 486 | |
| 487 #make appropriate clustering if needed | |
| 488 if(nrow(dataToHeatMap)>1 && nbClusters>1)rowClust=hclust(distExtended(dataToHeatMap,method = opt$distanceMeasure),method = opt$aggloMethod) | |
| 489 if(ncol(dataToHeatMap)>1 && nbSampleClusters>1)colClust=hclust(distExtended(t(dataToHeatMap),method = opt$distanceMeasure),method = opt$aggloMethod) | |
| 490 | |
| 491 if(nrow(dataToHeatMap)>maxRowsToDisplay){ | |
| 492 #make subsampling based on preliminary global clustering | |
| 493 #clusteringResults=cutree(rowClust,nbClusters) | |
| 494 #heatMapGenesToKeep=unlist(lapply(seq(1,nbClusters),function(x)sample(which(clusteringResults==x),min(length(which(clusteringResults==x)),round(maxRowsToDisplay/nbClusters))))) | |
| 495 ##OR | |
| 496 #basic subsampling | |
| 497 heatMapGenesToKeep=sample(rownames(dataToHeatMap),maxRowsToDisplay) | |
| 498 effectiveDataToHeatMap=matrix(dataToHeatMap[heatMapGenesToKeep,],ncol=ncol(dataToHeatMap),dimnames=list(heatMapGenesToKeep,colnames(dataToHeatMap))) | |
| 499 effectiveNbClusters=min(nbClusters,maxRowsToDisplay) | |
| 500 if(nrow(effectiveDataToHeatMap)>1 && effectiveNbClusters>1)effectiveRowClust=hclust(distExtended(effectiveDataToHeatMap, method = opt$distanceMeasure),method = opt$aggloMethod) | |
| 501 addComment(c("[WARNING]Too many rows for efficient heatmap drawing",maxRowsToDisplay,"subsampling is done for vizualization only"),T,opt$log,display=FALSE) | |
| 502 rm(heatMapGenesToKeep) | |
| 503 }else{ | |
| 504 effectiveDataToHeatMap=dataToHeatMap | |
| 505 effectiveRowClust=rowClust | |
| 506 effectiveNbClusters=nbClusters | |
| 507 } | |
| 508 | |
| 509 addComment(c("[INFO]Dim of plotted heatmap matrix:",dim(effectiveDataToHeatMap)),T,opt$log,display=FALSE) | |
| 510 | |
| 511 personalized_hoverinfo=matrix("",ncol = ncol(effectiveDataToHeatMap),nrow = nrow(effectiveDataToHeatMap),dimnames = dimnames(effectiveDataToHeatMap)) | |
| 512 if(expressionToCluster){ | |
| 513 for(iCol in colnames(effectiveDataToHeatMap)){for(iRow in rownames(effectiveDataToHeatMap)){personalized_hoverinfo[iRow,iCol]=paste(c("Probe: ",iRow,"\nCondition: ",iCol,"\n",valueMeaning,": ",effectiveDataToHeatMap[iRow,iCol]),collapse="")}} | |
| 514 }else{ | |
| 515 for(iCol in colnames(effectiveDataToHeatMap)){for(iRow in rownames(effectiveDataToHeatMap)){personalized_hoverinfo[iRow,iCol]=paste(c("Probe: ",iRow,"\nCondition: ",iCol,"\nFC: ",round(2^effectiveDataToHeatMap[iRow,iCol],2)),collapse="")}} | |
| 516 } | |
| 517 | |
| 518 #trying to overcome limitation of heatmaply package to modify xtick and ytick label, using directly plotly functions, but for now plotly do not permit to have personalized color for each x/y tick separately | |
| 519 test=FALSE | |
| 520 if(test==TRUE){ | |
| 521 | |
| 522 #define dendogram shapes | |
| 523 dd.row <- as.dendrogram(effectiveRowClust) | |
| 524 dd.col <- as.dendrogram(colClust) | |
| 525 | |
| 526 #and color them | |
| 527 dd.row=color_branches(dd.row, k = effectiveNbClusters, groupLabels = T) | |
| 528 dd.col=color_branches(dd.col, k = nbSampleClusters, groupLabels = T) | |
| 529 | |
| 530 #generating function for dendogram from segment list | |
| 531 ggdend <- function(df) { | |
| 532 ggplot() + | |
| 533 geom_segment(data = df, aes(x=x, y=y, xend=xend, yend=yend)) + | |
| 534 labs(x = "", y = "") + theme_minimal() + | |
| 535 theme(axis.text = element_blank(), axis.ticks = element_blank(), | |
| 536 panel.grid = element_blank()) | |
| 537 } | |
| 538 | |
| 539 # generate x/y dendogram plots | |
| 540 px <- ggdend(dendro_data(dd.col)$segments) | |
| 541 py <- ggdend(dendro_data(dd.row)$segments) + coord_flip() | |
| 542 | |
| 543 # reshape data matrix | |
| 544 col.ord <- order.dendrogram(dd.col) | |
| 545 row.ord <- order.dendrogram(dd.row) | |
| 546 xx <- effectiveDataToHeatMap[row.ord, col.ord] | |
| 547 # and also personalized_hoverinfo | |
| 548 personalized_hoverinfo=personalized_hoverinfo[row.ord, col.ord] | |
| 549 | |
| 550 # hide axis ticks and grid lines | |
| 551 eaxis <- list( | |
| 552 showticklabels = FALSE, | |
| 553 showgrid = FALSE, | |
| 554 zeroline = FALSE | |
| 555 ) | |
| 556 | |
| 557 #make the empty plot | |
| 558 p_empty <- plot_ly() %>% | |
| 559 layout(margin = list(l = 200), | |
| 560 xaxis = eaxis, | |
| 561 yaxis = eaxis) | |
| 562 | |
| 563 heatmap.plotly <- plot_ly( | |
| 564 z = xx, x = 1:ncol(xx), y = 1:nrow(xx), colors = viridis(n = 101, alpha = 1, begin = 0, end = 1, option = "inferno"), | |
| 565 type = "heatmap", showlegend = FALSE, text = personalized_hoverinfo, hoverinfo = "text", | |
| 566 colorbar = list( | |
| 567 # Capitalise first letter | |
| 568 title = valueMeaning, | |
| 569 tickmode = "array", | |
| 570 len = 0.3 | |
| 571 ) | |
| 572 ) %>% | |
| 573 layout( | |
| 574 xaxis = list( | |
| 575 tickfont = list(size = 10,color=get_leaves_branches_col(dd.row)), | |
| 576 tickangle = 45, | |
| 577 tickvals = 1:ncol(xx), ticktext = colnames(xx), | |
| 578 linecolor = "#ffffff", | |
| 579 range = c(0.5, ncol(xx) + 0.5), | |
| 580 showticklabels = TRUE | |
| 581 ), | |
| 582 yaxis = list( | |
| 583 tickfont = list(size = 10, color=get_leaves_branches_col(dd.col)), | |
| 584 tickangle = 0, | |
| 585 tickvals = 1:nrow(xx), ticktext = rownames(xx), | |
| 586 linecolor = "#ffffff", | |
| 587 range = c(0.5, nrow(xx) + 0.5), | |
| 588 showticklabels = TRUE | |
| 589 ) | |
| 590 ) | |
| 591 | |
| 592 #generate plotly | |
| 593 pp <- subplot(px, p_empty, heatmap.plotly, py, nrows = 2, margin = 0,widths = c(0.8,0.2),heights = c(0.2,0.8), shareX = TRUE, | |
| 594 shareY = TRUE) | |
| 595 | |
| 596 #save image file | |
| 597 export(pp, file = paste(c(file.path(getwd(), "plotDir"),"/Heatmap.",opt$format),collapse="")) | |
| 598 #rise a bug due to token stuf | |
| 599 #orca(pp, file = paste(c(file.path(getwd(), "plotDir"),"/Heatmap.",opt$format),collapse="")) | |
| 600 | |
| 601 | |
| 602 #save plotLy file | |
| 603 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/Heatmap.html"),collapse=""),selfcontained = F) | |
| 604 | |
| 605 #htmlwidgets::saveWidget(as_widget(pp),"~/Bureau/test.html",selfcontained = F) | |
| 606 | |
| 607 }else{ #test | |
| 608 label_names=c("Probe","Condition",valueMeaning) | |
| 609 | |
| 610 # #color hclust objects | |
| 611 # dd.row=color_branches(effectiveRowClust, k = effectiveNbClusters) | |
| 612 # #rowColors=get_leaves_branches_col(dd.row) | |
| 613 # #rowColors[order.dendrogram(dd.row)]=rowColors | |
| 614 # rowGroup=cutree(effectiveRowClust, k = effectiveNbClusters) | |
| 615 # | |
| 616 # #get order of class as they will be displayed on the dendogram | |
| 617 # rowGroupRenamed=data.frame(cluster=mapvalues(rowGroup, unique(rowGroup[order.dendrogram(dd.row)[nleaves(dd.row):1]]), 1:effectiveNbClusters)) | |
| 618 # | |
| 619 # dd.col=color_branches(colClust, k = nbSampleClusters) | |
| 620 # #colColors=get_leaves_branches_col(dd.col) | |
| 621 # #colColors[order.dendrogram(dd.col)]=colColors | |
| 622 # colGroup=cutree(colClust, k = nbSampleClusters) | |
| 623 # | |
| 624 # # #get order of class as they will be displayed on the dendogram | |
| 625 # colGroupRenamed=data.frame(sampleCluster=mapvalues(colGroup, unique(colGroup[order.dendrogram(dd.col)[nleaves(dd.col):1]]), 1:nbSampleClusters)) | |
| 626 | |
| 627 | |
| 628 #while option is not correctly managed by heatmap apply, put personalized_hoverinfo to NULL | |
| 629 personalized_hoverinfo=NULL | |
| 630 | |
| 631 if(is.null(opt$personalColors)){ | |
| 632 heatmapColors=viridis(n = 101, alpha = 1, begin = 0, end = 1, option = "inferno") | |
| 633 }else{ | |
| 634 heatmapColors=personalColors | |
| 635 } | |
| 636 | |
| 637 colGroupRenamed=NULL | |
| 638 if(!is.null(factorInfoMatrix)){ | |
| 639 colGroupRenamed=eval(parse(text=(paste("data.frame(",factorBarColor,"=factorInfoMatrix[colnames(effectiveDataToHeatMap),2])",sep="")))) | |
| 640 sideBarGroupNb=length(table(factorInfoMatrix[colnames(effectiveDataToHeatMap),2])) | |
| 641 sideBarColorPaletteName="Spectral" | |
| 642 if(!is.null(opt$sideBarColorPalette) && opt$sideBarColorPalette%in%rownames(RColorBrewer::brewer.pal.info)){ | |
| 643 sideBarColorPaletteName=opt$sideBarColorPalette | |
| 644 } | |
| 645 sideBarColorPalette=setNames(colorRampPalette(RColorBrewer::brewer.pal(RColorBrewer::brewer.pal.info[sideBarColorPaletteName,"maxcolors"], sideBarColorPaletteName))(sideBarGroupNb),unique(factorInfoMatrix[colnames(effectiveDataToHeatMap),2])) | |
| 646 } | |
| 647 | |
| 648 if(!is.null(colGroupRenamed)){ | |
| 649 pp <- heatmaply(effectiveDataToHeatMap,key.title = valueMeaning,k_row=effectiveNbClusters,k_col=nbSampleClusters,col_side_colors=colGroupRenamed,col_side_palette=sideBarColorPalette,Rowv=effectiveRowClust,Colv=colClust,label_names=label_names,custom_hovertext=personalized_hoverinfo,plot_method = "plotly",colors = heatmapColors) | |
| 650 }else{ | |
| 651 pp <- heatmaply(effectiveDataToHeatMap,key.title = valueMeaning,k_row=effectiveNbClusters,k_col=nbSampleClusters,Rowv=effectiveRowClust,Colv=colClust,label_names=label_names,custom_hovertext=personalized_hoverinfo,plot_method = "plotly",colors = heatmapColors) | |
| 652 } | |
| 653 | |
| 654 | |
| 655 #save image file | |
| 656 export(pp, file = paste(c(file.path(getwd(), "plotDir"),"/Heatmap.",opt$format),collapse="")) | |
| 657 #rise a bug due to token stuf | |
| 658 #orca(pp, file = paste(c(file.path(getwd(), "plotDir"),"/Heatmap.",opt$format),collapse="")) | |
| 659 | |
| 660 | |
| 661 #save plotLy file | |
| 662 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/Heatmap.html"),collapse=""),selfcontained = F) | |
| 663 | |
| 664 } | |
| 665 addComment("[INFO]Heatmap drawn",T,opt$log,display=FALSE) | |
| 666 | |
| 667 | |
| 668 #plot circular heatmap | |
| 669 if(!class(effectiveRowClust)=="logical"){ | |
| 670 dendo=as.dendrogram(effectiveRowClust) | |
| 671 | |
| 672 if(is.null(opt$personalColors)){ | |
| 673 col_fun = colorRamp2(quantile(effectiveDataToHeatMap,probs = seq(0,1,0.01)), viridis(101,option = "inferno")) | |
| 674 }else{ | |
| 675 col_fun = colorRamp2(quantile(effectiveDataToHeatMap,probs = seq(0,1,0.5)), personalColors) | |
| 676 } | |
| 677 | |
| 678 if(opt$format=="pdf"){ | |
| 679 pdf(paste(c("./plotDir/circularPlot.pdf"),collapse=""))}else{ | |
| 680 png(paste(c("./plotDir/circularPlot.png"),collapse="")) | |
| 681 } | |
| 682 | |
| 683 circos.par(cell.padding = c(0, 0, 0, 0), gap.degree = 5) | |
| 684 circos.initialize(c(rep("a",nrow(effectiveDataToHeatMap)),"b"),xlim=cbind(c(0,0),c(nrow(effectiveDataToHeatMap),5))) | |
| 685 circos.track(ylim = c(0, 1), bg.border = NA, panel.fun = function(x, y) { | |
| 686 if(CELL_META$sector.index=="a"){ | |
| 687 nr = ncol(effectiveDataToHeatMap) | |
| 688 nc = nrow(effectiveDataToHeatMap) | |
| 689 circos.text(1:nc- 0.5, rep(0,nc), adj = c(0, 0), | |
| 690 rownames(effectiveDataToHeatMap)[order.dendrogram(dendo)], facing = "clockwise", niceFacing = TRUE, cex = 0.3) | |
| 691 } | |
| 692 }) | |
| 693 | |
| 694 circos.track(ylim = c(0, ncol(effectiveDataToHeatMap)), bg.border = NA, panel.fun = function(x, y) { | |
| 695 | |
| 696 m = t(matrix(effectiveDataToHeatMap[order.dendrogram(dendo),],ncol=ncol(effectiveDataToHeatMap))) | |
| 697 col_mat = col_fun(m) | |
| 698 nr = nrow(m) | |
| 699 nc = ncol(m) | |
| 700 if(CELL_META$sector.index=="a"){ | |
| 701 for(i in 1:nr) { | |
| 702 circos.rect(1:nc - 1, rep(nr - i, nc), | |
| 703 1:nc, rep(nr - i + 1, nc), | |
| 704 border = col_mat[i, ], col = col_mat[i, ]) | |
| 705 } | |
| 706 }else{ | |
| 707 circos.text(rep(1,nr), seq(nr,1,-1) , colnames(effectiveDataToHeatMap),cex = 0.3) | |
| 708 } | |
| 709 }) | |
| 710 | |
| 711 #dendo = color_branches(dendo, k = effectiveNbClusters, col = colorRampPalette(brewer.pal(12,"Set3"))(effectiveNbClusters)) | |
| 712 dendo = color_branches(dendo, k = effectiveNbClusters, col = rev(colorspace::rainbow_hcl(effectiveNbClusters))) | |
| 713 | |
| 714 | |
| 715 circos.track(ylim = c(0, attributes(dendo)$height), bg.border = NA, track.height = 0.25, | |
| 716 panel.fun = function(x, y) { | |
| 717 if(CELL_META$sector.index=="a")circos.dendrogram(dendo)} ) | |
| 718 | |
| 719 circos.clear() | |
| 720 ##add legend | |
| 721 lgd_links = Legend(at = seq(ceiling(min(effectiveDataToHeatMap)),floor(max(effectiveDataToHeatMap)),ceiling((floor(max(effectiveDataToHeatMap))-ceiling(min(effectiveDataToHeatMap)))/4)), col_fun = col_fun, | |
| 722 title_position = "topleft", grid_width = unit(5, "mm") ,title = valueMeaning) | |
| 723 | |
| 724 pushViewport(viewport(x = 0.85, y = 0.80, | |
| 725 width = 0.1, | |
| 726 height = 0.1, | |
| 727 just = c("left", "bottom"))) | |
| 728 grid.draw(lgd_links) | |
| 729 upViewport() | |
| 730 | |
| 731 | |
| 732 dev.off() | |
| 733 | |
| 734 addComment("[INFO]Circular heatmap drawn",T,opt$log,display=FALSE) | |
| 735 loc <- Sys.setlocale("LC_NUMERIC","C") | |
| 736 }else{ | |
| 737 addComment(c("[WARNING]Circular plot will not be plotted considering row or cluster number < 2"),T,opt$log,display=FALSE) | |
| 738 } | |
| 739 rm(effectiveDataToHeatMap,effectiveRowClust,effectiveNbClusters) | |
| 740 | |
| 741 #plot screeplot | |
| 742 if(class(rowClust)!="logical" && nrow(dataToHeatMap)>2){ | |
| 743 screePlotData=c() | |
| 744 for(iNbClusters in 2:(nbClusters+min(10,max(0,nrow(dataToHeatMap)-nbClusters)))){ | |
| 745 clusteringResults=cutree(rowClust,iNbClusters) | |
| 746 #clusteringResults=kmeans(dataToHeatMap,iNbClusters)$cluster | |
| 747 | |
| 748 #compute variance between each intra-class points amongst themselves (need at least 3 points by cluster) | |
| 749 #screePlotData=c(screePlotData,sum(unlist(lapply(seq(1,iNbClusters),function(x){temp=which(clusteringResults==x);if(length(temp)>2){var(dist(dataToHeatMap[temp,]))}else{0}}))) ) | |
| 750 #compute variance between each intra-class points and fictive mean point (need at least 2 points by cluster) | |
| 751 #screePlotData=c(screePlotData,sum(unlist(lapply(seq(1,iNbClusters),function(x){temp=which(clusteringResults==x);if(length(temp)>1){ var(dist(rbind(apply(dataToHeatMap[temp,],2,mean),dataToHeatMap[temp,]))[1:length(temp)]) }else{0}}))) ) | |
| 752 if(ncol(dataToHeatMap)>1)screePlotData=c(screePlotData,sum(unlist(lapply(seq(1,iNbClusters),function(x){temp=which(clusteringResults==x);if(length(temp)>1){ sum((distExtended(rbind(apply(dataToHeatMap[temp,],2,mean),dataToHeatMap[temp,]),method = opt$distanceMeasure)[1:length(temp)])^2) }else{0}}))) ) | |
| 753 else screePlotData=c(screePlotData,sum(unlist(lapply(seq(1,iNbClusters),function(x){temp=which(clusteringResults==x);if(length(temp)>1){ sum((dataToHeatMap[temp,]-mean(dataToHeatMap[temp,]))^2) }else{0}}))) ) | |
| 754 } | |
| 755 | |
| 756 dataToPlot=data.frame(clusterNb=seq(2,length(screePlotData)+1),wcss=screePlotData) | |
| 757 p <- ggplot(data=dataToPlot, aes(clusterNb,wcss)) + geom_point(colour="#EE4444") + geom_line(colour="#DD9999") + | |
| 758 ggtitle("Scree plot") + theme_bw() + xlab(label="Cluster number") + ylab(label="Within cluster sum of squares") + | |
| 759 theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),legend.position = "none") + | |
| 760 scale_x_continuous(breaks=seq(min(dataToPlot$clusterNb), max(dataToPlot$clusterNb), 1)) | |
| 761 | |
| 762 #save plotly files | |
| 763 pp <- ggplotly(p) | |
| 764 | |
| 765 if(opt$format=="pdf"){ | |
| 766 pdf(paste(c("./plotDir/screePlot.pdf"),collapse=""))}else{ | |
| 767 png(paste(c("./plotDir/screePlot.png"),collapse="")) | |
| 768 } | |
| 769 plot(p) | |
| 770 dev.off() | |
| 771 | |
| 772 #save plotly files | |
| 773 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/screePlot.html"),collapse=""),selfcontained = F) | |
| 774 | |
| 775 addComment("[INFO]Scree plot drawn",T,opt$log,display=FALSE) | |
| 776 }else{ | |
| 777 addComment(c("[WARNING]Scree plot will not be plotted considering row number <= 2"),T,opt$log,display=FALSE) | |
| 778 } | |
| 779 | |
| 780 ##---------------------- | |
| 781 | |
| 782 #filter output based on parameters | |
| 783 | |
| 784 rowToKeep=rownames(dataToHeatMap) | |
| 785 if(!is.null(opt$filterInputOutput) && opt$filterInputOutput=="output"){ | |
| 786 #rowToKeep=intersect(which(comparisonMatrix[,seq(2,ncol(comparisonMatrix),4)]<=opt$pvalThreshold),which(abs(comparisonMatrix[,seq(4,ncol(comparisonMatrix),4)])>=log2(opt$FCthreshold))) | |
| 787 if(is.null(opt$geneListFiltering)){ | |
| 788 if(is.null(opt$genericData)){ | |
| 789 #diff. expression matrix | |
| 790 rowToKeep=names(which(unlist(apply(comparisonMatrix,1,function(x)length(intersect(which(x[seq(2,length(x),nbColPerContrast)]<=opt$pvalThreshold),which(abs(x[seq(4,length(x),nbColPerContrast)])>=log2(opt$FCthreshold))))!=0)))) | |
| 791 }else{ | |
| 792 #generic filtering matrix | |
| 793 rowToKeep=rownames(comparisonMatrix) | |
| 794 if(!is.null(opt$comparisonNameLow)){ | |
| 795 restrictedLowComparisons=unlist(strsplit(opt$comparisonNameLow,",")) | |
| 796 rowToKeep=intersect(rowToKeep,names(which(unlist(apply(comparisonMatrix,1,function(x)length(which(x[restrictedLowComparisons]>opt$FCthreshold))!=0))))) | |
| 797 } | |
| 798 if(!is.null(opt$comparisonNameHigh)){ | |
| 799 restrictedHighComparisons=unlist(strsplit(opt$comparisonNameHigh,",")) | |
| 800 rowToKeep=intersect(rowToKeep,names(which(unlist(apply(comparisonMatrix,1,function(x)length(which(x[restrictedHighComparisons]<opt$pvalThreshold))!=0))))) | |
| 801 } | |
| 802 } | |
| 803 }else{ | |
| 804 geneListFiltering=read.csv(opt$geneListFiltering,as.is = 1,header=F) | |
| 805 rowToKeep=unlist(c(geneListFiltering)) | |
| 806 } | |
| 807 if(!is.null(comparisonMatrix) && !all(rowToKeep%in%rownames(comparisonMatrix))){ | |
| 808 #should arrive only with user gene list filtering with diff.exp. results clustering | |
| 809 addComment("[WARNING] some genes of the user defined list are not in the diff. exp. input file",T,opt$log) | |
| 810 rowToKeep=intersect(rowToKeep,rownames(comparisonMatrix)) | |
| 811 } | |
| 812 | |
| 813 if(expressionToCluster && !all(rowToKeep%in%rownames(expressionMatrix))){ | |
| 814 addComment("[WARNING] some genes selected by the output filter are not in the expression file",T,opt$log) | |
| 815 rowToKeep=intersect(rowToKeep,rownames(expressionMatrix)) | |
| 816 } | |
| 817 addComment(c("[INFO]Output filtering step:",length(rowToKeep),"remaining rows"),T,opt$log,display=FALSE) | |
| 818 } | |
| 819 | |
| 820 #we add differential analysis info in output if it was directly used for clustering or when it was used for filtering with expression | |
| 821 | |
| 822 #in case of expression or generic data clustering without filtering based on external stats | |
| 823 if(expressionToCluster && is.null(comparisonMatrix)){ | |
| 824 if(length(rowToKeep)==0){ | |
| 825 addComment("[WARNING]No more gene after output filtering step, tabular output will be empty",T,opt$log,display=FALSE) | |
| 826 outputData=matrix(c("Gene","Cluster","noGene","noClustering"),ncol=2,nrow=2,byrow = TRUE) | |
| 827 }else{ | |
| 828 outputData=matrix(0,ncol=2,nrow=length(rowToKeep)+1) | |
| 829 outputData[1,]=c("Gene","Cluster") | |
| 830 outputData[2:(length(rowToKeep)+1),1]=rowToKeep | |
| 831 if(class(rowClust)!="logical" ){ | |
| 832 outputData[2:(length(rowToKeep)+1),2]=cutree(rowClust,nbClusters)[rowToKeep] | |
| 833 }else{ | |
| 834 outputData[2:(length(rowToKeep)+1),2]=0 | |
| 835 } | |
| 836 } | |
| 837 } | |
| 838 | |
| 839 #in case of generic data clustering with filtering based on generic external data | |
| 840 if(!is.null(opt$genericData) && !is.null(comparisonMatrix)){ | |
| 841 if(length(rowToKeep)==0){ | |
| 842 addComment("[WARNING]No more gene after output filtering step, tabular output will be empty",T,opt$log,display=FALSE) | |
| 843 outputData=matrix(c("Gene","Cluster","noGene","noClustering"),ncol=2,nrow=2,byrow = TRUE) | |
| 844 }else{ | |
| 845 outputData=matrix(0,ncol=2+nbComparisons,nrow=length(rowToKeep)+1) | |
| 846 outputData[1,]=c("Gene","Cluster",colnames(comparisonMatrix)) | |
| 847 outputData[2:(length(rowToKeep)+1),1]=rowToKeep | |
| 848 if(class(rowClust)!="logical" ){ | |
| 849 outputData[2:(length(rowToKeep)+1),2]=cutree(rowClust,nbClusters)[rowToKeep] | |
| 850 }else{ | |
| 851 outputData[2:(length(rowToKeep)+1),2]=0 | |
| 852 } | |
| 853 outputData[2:(length(rowToKeep)+1),3:(ncol(comparisonMatrix)+2)]=prettyNum(comparisonMatrix[rowToKeep,],digits=4) | |
| 854 } | |
| 855 } | |
| 856 | |
| 857 #in case of expression data clustering with filtering based on diff. exp. results or diff. exp. results clustering | |
| 858 if(is.null(opt$genericData) && !is.null(comparisonMatrix)){ | |
| 859 if(length(rowToKeep)==0){ | |
| 860 addComment("[WARNING]No more gene after output filtering step, tabular output will be empty",T,opt$log,display=FALSE) | |
| 861 outputData=matrix(0,ncol=3,nrow=3) | |
| 862 outputData[1,]=c("","","Comparison") | |
| 863 outputData[2,]=c("Gene","Info","Cluster") | |
| 864 outputData[3,]=c("noGene","noInfo","noClustering") | |
| 865 }else{ | |
| 866 outputData=matrix(0,ncol=3+nbComparisons*nbColPerContrast,nrow=length(rowToKeep)+2) | |
| 867 outputData[1,]=c("","","Comparison",rep(colnames(comparisonMatrix)[seq(1,ncol(comparisonMatrix),nbColPerContrast)],each=nbColPerContrast)) | |
| 868 outputData[2,]=c("Gene","Info","Cluster",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),nbComparisons)) | |
| 869 outputData[3:(length(rowToKeep)+2),1]=rowToKeep | |
| 870 outputData[3:(length(rowToKeep)+2),2]=comparisonMatrixInfoGene[rowToKeep] | |
| 871 if(class(rowClust)!="logical" ){ | |
| 872 outputData[3:(length(rowToKeep)+2),3]=cutree(rowClust,nbClusters)[rowToKeep] | |
| 873 }else{ | |
| 874 outputData[3:(length(rowToKeep)+2),3]=0 | |
| 875 } | |
| 876 outputData[3:(length(rowToKeep)+2),4:(ncol(comparisonMatrix)+3)]=prettyNum(comparisonMatrix[rowToKeep,],digits=4) | |
| 877 } | |
| 878 } | |
| 879 | |
| 880 addComment("[INFO]Formated output",T,opt$log,display=FALSE) | |
| 881 write.table(outputData,file=opt$outputFile,quote=FALSE,sep="\t",col.names = F,row.names = F) | |
| 882 | |
| 883 ##---------------------- | |
| 884 | |
| 885 end.time <- Sys.time() | |
| 886 addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE) | |
| 887 | |
| 888 | |
| 889 addComment("[INFO]End of R script",T,opt$log,display=FALSE) | |
| 890 | |
| 891 printSessionInfo(opt$log) | |
| 892 | |
| 893 #sessionInfo() | |
| 894 | |
| 895 | |
| 896 | 
