Mercurial > repos > vandelj > giant_aptsummarize
comparison src/ExprPlotsScript.R @ 0:708f43bda2b6 draft
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit cb276a594444c8f32e9819fefde3a21f121d35df"
| author | vandelj | 
|---|---|
| date | Fri, 26 Jun 2020 09:35:11 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:708f43bda2b6 | 
|---|---|
| 1 # A command-line interface to basic plots for use with Galaxy | |
| 2 # written by Jimmy Vandel | |
| 3 # one of these arguments is required: | |
| 4 # | |
| 5 # | |
| 6 initial.options <- commandArgs(trailingOnly = FALSE) | |
| 7 file.arg.name <- "--file=" | |
| 8 script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)]) | |
| 9 script.basename <- dirname(script.name) | |
| 10 source(file.path(script.basename, "utils.R")) | |
| 11 source(file.path(script.basename, "getopt.R")) | |
| 12 | |
| 13 #addComment("Welcome R!") | |
| 14 | |
| 15 # setup R error handling to go to stderr | |
| 16 options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | |
| 17 | |
| 18 # we need that to not crash galaxy with an UTF8 error on German LC settings. | |
| 19 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
| 20 loc <- Sys.setlocale("LC_NUMERIC", "C") | |
| 21 | |
| 22 #get starting time | |
| 23 start.time <- Sys.time() | |
| 24 | |
| 25 #get options | |
| 26 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) | |
| 27 args <- commandArgs() | |
| 28 | |
| 29 | |
| 30 # get options, using the spec as defined by the enclosed list. | |
| 31 # we read the options from the default: commandArgs(TRUE). | |
| 32 spec <- matrix(c( | |
| 33 "dataFile", "i", 1, "character", | |
| 34 "factorInfo","t", 1, "character", | |
| 35 "dataFileFormat","j",1,"character", | |
| 36 "conditionNames","c",1,"character", | |
| 37 "format", "f", 1, "character", | |
| 38 "quiet", "q", 0, "logical", | |
| 39 "log", "l", 1, "character", | |
| 40 "histo" , "h", 1, "character", | |
| 41 "maPlot" , "a", 1, "character", | |
| 42 "boxplot" , "b", 1, "character", | |
| 43 "microarray" , "m", 1, "character", | |
| 44 "acp" , "p" , 1, "character", | |
| 45 "screePlot" , "s" , 1, "character"), | |
| 46 byrow=TRUE, ncol=4) | |
| 47 opt <- getopt(spec) | |
| 48 | |
| 49 # enforce the following required arguments | |
| 50 if (is.null(opt$log)) { | |
| 51 addComment("[ERROR]'log file' is required") | |
| 52 q( "no", 1, F ) | |
| 53 } | |
| 54 addComment("[INFO]Start of R script",T,opt$log,display=FALSE) | |
| 55 if (is.null(opt$dataFile) || is.null(opt$dataFileFormat)) { | |
| 56 addComment("[ERROR]'dataFile' and it format are required",T,opt$log) | |
| 57 q( "no", 1, F ) | |
| 58 } | |
| 59 if (is.null(opt$format)) { | |
| 60 addComment("[ERROR]'output format' is required",T,opt$log) | |
| 61 q( "no", 1, F ) | |
| 62 } | |
| 63 if (is.null(opt$histo) & is.null(opt$maPlot) & is.null(opt$boxplot) & is.null(opt$microarray) & is.null(opt$acp)){ | |
| 64 addComment("[ERROR]Select at least one plot to draw",T,opt$log) | |
| 65 q( "no", 1, F ) | |
| 66 } | |
| 67 | |
| 68 verbose <- if (is.null(opt$quiet)) { | |
| 69 TRUE | |
| 70 }else{ | |
| 71 FALSE} | |
| 72 | |
| 73 addComment("[INFO]Parameters checked!",T,opt$log,display=FALSE) | |
| 74 | |
| 75 addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE) | |
| 76 addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE) | |
| 77 | |
| 78 #directory for plots | |
| 79 dir.create(file.path(getwd(), "plotDir")) | |
| 80 dir.create(file.path(getwd(), "plotLyDir")) | |
| 81 | |
| 82 #silent package loading | |
| 83 suppressPackageStartupMessages({ | |
| 84 library("oligo") | |
| 85 library("ff") | |
| 86 library("ggplot2") | |
| 87 library("plotly") | |
| 88 }) | |
| 89 | |
| 90 | |
| 91 #chargement des fichiers en entrée | |
| 92 #fichier de type CEL | |
| 93 dataAreFromCel=FALSE | |
| 94 if(toupper(opt$dataFileFormat)=="CEL"){ | |
| 95 dataAreFromCel=TRUE | |
| 96 celData=read.celfiles(unlist(strsplit(opt$dataFile,","))) | |
| 97 #load all expressions | |
| 98 dataMatrix=exprs(celData) | |
| 99 #select "pm" probes | |
| 100 probeInfo=getProbeInfo(celData,probeType = c("pm"),target="probeset") | |
| 101 #reduce dataMatrix to log expression matrix for a randomly probe selection | |
| 102 dataMatrix=log2(dataMatrix[sample(unique(probeInfo[,1]),min(100000,length(unique(probeInfo[,1])))),]) | |
| 103 addComment("[INFO]Raw data are log2 transformed",TRUE,opt$log,display=FALSE) | |
| 104 remove(probeInfo) | |
| 105 }else{ | |
| 106 #fichier deja tabule | |
| 107 dataMatrix=read.csv(file=opt$dataFile,header=F,sep="\t",colClasses="character") | |
| 108 #remove first row to convert it as colnames (to avoid X before colnames with header=T) | |
| 109 colNamesData=dataMatrix[1,-1] | |
| 110 dataMatrix=dataMatrix[-1,] | |
| 111 #remove first colum to convert it as rownames | |
| 112 rowNamesData=dataMatrix[,1] | |
| 113 dataMatrix=dataMatrix[,-1] | |
| 114 if(is.data.frame(dataMatrix)){ | |
| 115 dataMatrix=data.matrix(dataMatrix) | |
| 116 }else{ | |
| 117 dataMatrix=data.matrix(as.numeric(dataMatrix)) | |
| 118 } | |
| 119 dimnames(dataMatrix)=list(rowNamesData,colNamesData) | |
| 120 if(any(duplicated(rowNamesData)))addComment("[WARNING] several rows share the same probe/gene name, you should merge or rename them to avoid further analysis mistakes",TRUE,opt$log,display=FALSE) | |
| 121 } | |
| 122 | |
| 123 addComment("[INFO]Input data loaded",TRUE,opt$log,display=FALSE) | |
| 124 addComment(c("[INFO]Dim of data matrix:",dim(dataMatrix)),T,opt$log,display=FALSE) | |
| 125 | |
| 126 #get number of conditions | |
| 127 nbConditions=ncol(dataMatrix) | |
| 128 | |
| 129 #get condition names if they are specified | |
| 130 if(!is.null(opt$conditionNames) && length(opt$conditionNames)==nbConditions){ | |
| 131 nameConditions=opt$conditionNames | |
| 132 colnames(dataMatrix)=nameConditions | |
| 133 #rownames(phenoData(celData)@data)=nameConditions | |
| 134 #rownames(protocolData(celData)@data)=nameConditions | |
| 135 }else{ | |
| 136 nameConditions=colnames(dataMatrix) | |
| 137 } | |
| 138 | |
| 139 #create a correspondance table between plot file names and name displayed in figure legend and html items | |
| 140 correspondanceNameTable=matrix("",ncol=2,nrow=nbConditions) | |
| 141 correspondanceNameTable[,1]=paste("Condition",1:nbConditions,sep="") | |
| 142 correspondanceNameTable[,2]=nameConditions | |
| 143 rownames(correspondanceNameTable)=correspondanceNameTable[,2] | |
| 144 | |
| 145 addComment("[INFO]Retreive condition names",TRUE,opt$log,display=FALSE) | |
| 146 | |
| 147 if(!is.null(opt$factorInfo)){ | |
| 148 #chargement du fichier des facteurs | |
| 149 factorInfoMatrix=read.csv(file=file.path(getwd(), opt$factorInfo),header=F,sep="\t",colClasses="character") | |
| 150 #remove first row to convert it as colnames | |
| 151 colnames(factorInfoMatrix)=factorInfoMatrix[1,] | |
| 152 factorInfoMatrix=factorInfoMatrix[-1,] | |
| 153 #use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case | |
| 154 rownames(factorInfoMatrix)=factorInfoMatrix[,1] | |
| 155 | |
| 156 | |
| 157 if(length(setdiff(colnames(dataMatrix),rownames(factorInfoMatrix)))!=0){ | |
| 158 addComment("[ERROR]Missing samples in factor file",T,opt$log) | |
| 159 q( "no", 1, F ) | |
| 160 } | |
| 161 | |
| 162 #order sample as in expression matrix and remove spurious sample | |
| 163 factorInfoMatrix=factorInfoMatrix[colnames(dataMatrix),] | |
| 164 | |
| 165 addComment("[INFO]Factors OK",T,opt$log,display=FALSE) | |
| 166 addComment(c("[INFO]Dim of factorInfo matrix:",dim(factorInfoMatrix)),T,opt$log,display=FALSE) | |
| 167 | |
| 168 } | |
| 169 | |
| 170 addComment("[INFO]Ready to plot",T,opt$log,display=FALSE) | |
| 171 | |
| 172 | |
| 173 ##---------------------- | |
| 174 | |
| 175 ###plot histograms### | |
| 176 histogramPerFigure=50 | |
| 177 if (!is.null(opt$histo)) { | |
| 178 for(iToPlot in 1:(((nbConditions-1)%/%histogramPerFigure)+1)){ | |
| 179 firstPlot=1+histogramPerFigure*(iToPlot-1) | |
| 180 lastPlot=min(nbConditions,histogramPerFigure*iToPlot) | |
| 181 dataToPlot=data.frame(x=c(dataMatrix[,firstPlot:lastPlot]),Experiment=rep(colnames(dataMatrix)[firstPlot:lastPlot],each=nrow(dataMatrix))) | |
| 182 p <- ggplot(data=dataToPlot, aes(x = x, color=Experiment)) + stat_density(geom="line", size=1, position="identity") + | |
| 183 ggtitle("Intensity densities") + theme_bw() + ylab(label="Density") + | |
| 184 theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5)) | |
| 185 if(dataAreFromCel){ | |
| 186 #original ploting function | |
| 187 #hist(celData[,firstPlot:lastPlot],lty=rep(1,nbConditions)[firstPlot:lastPlot],lwd=2,which='pm',target="probeset",transfo=log2,col=rainbow(nbConditions)[firstPlot:lastPlot]) | |
| 188 p <- p + xlab(label="Log2 intensities") | |
| 189 }else{ | |
| 190 p <- p + xlab(label="Intensities") | |
| 191 } | |
| 192 if(opt$format=="pdf"){ | |
| 193 pdf(paste(c("./plotDir/",opt$histo,iToPlot,".pdf"),collapse=""))}else{ | |
| 194 png(paste(c("./plotDir/",opt$histo,iToPlot,".png"),collapse="")) | |
| 195 } | |
| 196 print(p) | |
| 197 dev.off() | |
| 198 #save plotly files | |
| 199 pp <- ggplotly(p) | |
| 200 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$histo,iToPlot,".html"),collapse=""),selfcontained = F) | |
| 201 } | |
| 202 remove(p,dataToPlot) | |
| 203 addComment("[INFO]Histograms drawn",T,opt$log,display=FALSE) | |
| 204 } | |
| 205 | |
| 206 ##---------------------- | |
| 207 | |
| 208 ###plot MAplots### | |
| 209 MAplotPerPage=4 | |
| 210 if (!is.null(opt$maPlot)) { | |
| 211 iToPlot=1 | |
| 212 plotVector=list() | |
| 213 toTake=sample(nrow(dataMatrix),min(200000,nrow(dataMatrix))) | |
| 214 refMedianColumn=rowMedians(as.matrix(dataMatrix[toTake,])) | |
| 215 if(length(toTake)>100000)addComment(c("[INFO]high number of input data rows ",length(toTake),"; the generation of MA plot can take a while, please be patient"),TRUE,opt$log,display=FALSE) | |
| 216 for (iCondition in 1:nbConditions){ | |
| 217 #MAplot(celData,which=i,what=pm,transfo=log2) | |
| 218 #smoothScatter(x=xToPlot,y=yToPlot,main=nameConditions[iCondition]) | |
| 219 dataA=dataMatrix[toTake,iCondition] | |
| 220 dataB=refMedianColumn####ATTENTION PAR DEFAUT | |
| 221 xToPlot=0.5*(dataA+dataB) | |
| 222 yToPlot=dataA-dataB | |
| 223 tempX=seq(min(xToPlot),max(xToPlot),0.1) | |
| 224 tempY=unlist(lapply(tempX,function(x){median(yToPlot[intersect(which(xToPlot>=(x-0.1/2)),which(xToPlot<(x+0.1/2)))])})) | |
| 225 | |
| 226 dataToPlot=data.frame(x=xToPlot,y=yToPlot) | |
| 227 dataMedianToPlot=data.frame(x=tempX,y=tempY) | |
| 228 p <- ggplot(data=dataToPlot, aes(x,y)) + stat_density2d(aes(fill = ..density..^0.25), geom = "tile", contour = FALSE, n = 100) + | |
| 229 scale_fill_continuous(low = "white", high = "dodgerblue4") + geom_smooth(data=dataMedianToPlot,colour="red", size=0.5, se=FALSE) + | |
| 230 ggtitle(correspondanceNameTable[iCondition,2]) + theme_bw() + xlab(label="") + ylab(label="") + | |
| 231 theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),legend.position = "none") | |
| 232 plotVector[[length(plotVector)+1]]=p | |
| 233 | |
| 234 #save plotly files | |
| 235 pp <- ggplotly(p) | |
| 236 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$maPlot,"_",correspondanceNameTable[iCondition,1],".html"),collapse=""),selfcontained = F) | |
| 237 | |
| 238 if(iCondition==nbConditions || length(plotVector)==MAplotPerPage){ | |
| 239 #define a new plotting file | |
| 240 if(opt$format=="pdf"){ | |
| 241 pdf(paste(c("./plotDir/",opt$maPlot,iToPlot,".pdf"),collapse=""))}else{ | |
| 242 png(paste(c("./plotDir/",opt$maPlot,iToPlot,".png"),collapse="")) | |
| 243 } | |
| 244 multiplot(plotlist=plotVector,cols=2) | |
| 245 dev.off() | |
| 246 if(iCondition<nbConditions){ | |
| 247 #prepare for a new plotting file if necessary | |
| 248 plotVector=list() | |
| 249 iToPlot=iToPlot+1 | |
| 250 } | |
| 251 } | |
| 252 } | |
| 253 remove(p,dataToPlot,dataA,dataB,toTake,xToPlot,yToPlot) | |
| 254 addComment("[INFO]MAplots drawn",T,opt$log,display=FALSE) | |
| 255 } | |
| 256 | |
| 257 ##---------------------- | |
| 258 | |
| 259 ###plot boxplots### | |
| 260 boxplotPerFigure=50 | |
| 261 if (!is.null(opt$boxplot)) { | |
| 262 for(iToPlot in 1:(((nbConditions-1)%/%boxplotPerFigure)+1)){ | |
| 263 firstPlot=1+boxplotPerFigure*(iToPlot-1) | |
| 264 lastPlot=min(nbConditions,boxplotPerFigure*iToPlot) | |
| 265 dataToPlot=data.frame(intensities=c(dataMatrix[,firstPlot:lastPlot]),Experiment=rep(colnames(dataMatrix)[firstPlot:lastPlot],each=nrow(dataMatrix))) | |
| 266 #to make HTML file lighter, sampling will be done amongst outliers | |
| 267 #get outliers for each boxplot | |
| 268 boxplotsOutliers=apply(dataMatrix[,firstPlot:lastPlot],2,function(x)boxplot.stats(x)$out) | |
| 269 #sample amongst them to keep at maximum of 1000 points and include both min and max outliers values | |
| 270 boxplotsOutliers=lapply(boxplotsOutliers,function(x)if(length(x)>0)c(sample(c(x),min(length(x),1000)),max(c(x)),min(c(x)))) | |
| 271 dataOutliers=data.frame(yVal=unlist(boxplotsOutliers),xVal=unlist(lapply(seq_along(boxplotsOutliers),function(x)rep(names(boxplotsOutliers)[x],length(boxplotsOutliers[[x]]))))) | |
| 272 #plot boxplots without outliers | |
| 273 p <- ggplot(data=dataToPlot, aes(y = intensities, x=Experiment ,color=Experiment)) + geom_boxplot(outlier.colour=NA,outlier.shape =NA) + | |
| 274 ggtitle("Intensities") + theme_bw() + xlab(label="") + | |
| 275 theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 45, hjust = 1),plot.margin=unit(c(10,10,max(unlist(lapply(dataToPlot$Experiment,function(x)nchar(as.character(x))))),15+max(unlist(lapply(dataToPlot$Experiment,function(x)nchar(as.character(x)))))),"mm")) | |
| 276 #add to plot sampled outliers | |
| 277 p <- p + geom_point(data=dataOutliers,aes(x=xVal,y=yVal,color=xVal),inherit.aes = F) | |
| 278 if(dataAreFromCel){ | |
| 279 #original plotting function | |
| 280 #boxplot(celData[,firstPlot:lastPlot],which='pm',col=rainbow(nbConditions)[firstPlot:lastPlot],target="probeset",transfo=log2,names=nameConditions[firstPlot:lastPlot],main="Intensities") | |
| 281 p <- p + ylab(label="Log2 intensities") | |
| 282 }else{ | |
| 283 p <- p + ylab(label="Intensities") | |
| 284 } | |
| 285 if(opt$format=="pdf"){ | |
| 286 pdf(paste(c("./plotDir/",opt$boxplot,iToPlot,".pdf"),collapse=""))}else{ | |
| 287 png(paste(c("./plotDir/",opt$boxplot,iToPlot,".png"),collapse="")) | |
| 288 } | |
| 289 print(p) | |
| 290 dev.off() | |
| 291 #save plotly files | |
| 292 pp <- ggplotly(p) | |
| 293 | |
| 294 #modify plotly object to get HTML file not too heavy for loading | |
| 295 for(iData in 1:length(pp$x$data)){ | |
| 296 ##get kept outliers y values | |
| 297 #yPointsToKeep=dataOutliers$yVal[which(dataOutliers$xVal==pp$x$data[[iData]]$name)] | |
| 298 if(pp$x$data[[iData]]$type=="scatter"){ | |
| 299 ##scatter plot represent outliers points added to boxplot through geom_point | |
| 300 ##nothing to do as outliers have been sampled allready, just have to modify hover text | |
| 301 #if(length(yPointsToKeep)>0){ | |
| 302 #pointsToKeep=which(pp$x$data[[iData]]$y %in% yPointsToKeep) | |
| 303 #pp$x$data[[iData]]$x=pp$x$data[[iData]]$x[pointsToKeep] | |
| 304 #pp$x$data[[iData]]$y=pp$x$data[[iData]]$y[pointsToKeep] | |
| 305 #pp$x$data[[iData]]$text=pp$x$data[[iData]]$text[pointsToKeep] | |
| 306 #}else{ | |
| 307 #pp$x$data[[iData]]$x=NULL | |
| 308 #pp$x$data[[iData]]$y=NULL | |
| 309 #pp$x$data[[iData]]$marker$opacity=0 | |
| 310 #pp$x$data[[iData]]$hoverinfo=NULL | |
| 311 #pp$x$data[[iData]]$text=NULL | |
| 312 #} | |
| 313 #modify text to display | |
| 314 if(dataAreFromCel){ | |
| 315 pp$x$data[[iData]]$text=unlist(lapply(seq_along(pp$x$data[[iData]]$y),function(x)return(paste(c("log2(intensity) ",prettyNum(pp$x$data[[iData]]$y[x],digits=4)),collapse = "")))) | |
| 316 }else{ | |
| 317 pp$x$data[[iData]]$text=unlist(lapply(seq_along(pp$x$data[[iData]]$y),function(x)return(paste(c("intensity ",prettyNum(pp$x$data[[iData]]$y[x],digits=4)),collapse = "")))) | |
| 318 } | |
| 319 }else{ | |
| 320 ##disable marker plotting to keep only box and whiskers plot (outliers are displayed through scatter plot) | |
| 321 pp$x$data[[iData]]$marker$opacity=0 | |
| 322 | |
| 323 #sample 50000 points amongst all data to get a lighter html file, sampling size should not be too low to avoid modifying limit of boxplots | |
| 324 pp$x$data[[iData]]$y=c(sample(dataMatrix[,pp$x$data[[iData]]$name],min(length(dataMatrix[,pp$x$data[[iData]]$name]),50000)),min(dataMatrix[,pp$x$data[[iData]]$name]),max(dataMatrix[,pp$x$data[[iData]]$name])) | |
| 325 pp$x$data[[iData]]$x=rep(pp$x$data[[iData]]$x[1],length(pp$x$data[[iData]]$y)) | |
| 326 | |
| 327 ##first remove outliers info | |
| 328 #downUpValues=boxplot.stats(dataMatrix[,pp$x$data[[iData]]$name])$stats | |
| 329 #if(verbose)addComment(c("filter values for boxplot",pp$x$data[[iData]]$name,"between",min(downUpValues),"and",max(downUpValues)),T,opt$log) | |
| 330 #pointsToRemove=which(pp$x$data[[iData]]$y<min(downUpValues)) | |
| 331 #if(length(pointsToRemove)>0)pp$x$data[[iData]]$y=pp$x$data[[iData]]$y[-pointsToRemove] | |
| 332 #pointsToRemove=which(pp$x$data[[iData]]$y>max(downUpValues)) | |
| 333 #if(length(pointsToRemove)>0)pp$x$data[[iData]]$y=pp$x$data[[iData]]$y[-pointsToRemove] | |
| 334 #then add sampled outliers info | |
| 335 #pp$x$data[[iData]]$y=c(yPointsToKeep,pp$x$data[[iData]]$y) | |
| 336 #pp$x$data[[iData]]$x=rep(pp$x$data[[iData]]$x[1],length(pp$x$data[[iData]]$y)) | |
| 337 } | |
| 338 } | |
| 339 | |
| 340 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$boxplot,iToPlot,".html"),collapse=""),selfcontained = F) | |
| 341 } | |
| 342 remove(p,dataToPlot) | |
| 343 addComment("[INFO]Boxplots drawn",T,opt$log,display=FALSE) | |
| 344 | |
| 345 } | |
| 346 | |
| 347 ##---------------------- | |
| 348 | |
| 349 ###plot microarrays (only for .CEL files)### | |
| 350 if (!is.null(opt$microarray) && dataAreFromCel) { | |
| 351 for (iCondition in 1:nbConditions){ | |
| 352 if(opt$format=="pdf"){ | |
| 353 pdf(paste(c("./plotDir/",opt$microarray,"_",correspondanceNameTable[iCondition,1],".pdf"),collapse=""),onefile = F,width = 5,height = 5)}else{ | |
| 354 png(paste(c("./plotDir/",opt$microarray,"_",correspondanceNameTable[iCondition,1],".png"),collapse="")) | |
| 355 } | |
| 356 image(celData[,iCondition],main=correspondanceNameTable[iCondition,2]) | |
| 357 dev.off() | |
| 358 } | |
| 359 addComment("[INFO]Microarray drawn",T,opt$log,display=FALSE) | |
| 360 } | |
| 361 | |
| 362 ##---------------------- | |
| 363 | |
| 364 ###plot PCA plot### | |
| 365 if (!is.null(opt$acp)){ | |
| 366 ##to avoid error when nrow is too large, results quite stable with 200k random selected rows | |
| 367 randomSelection=sample(nrow(dataMatrix),min(200000,nrow(dataMatrix))) | |
| 368 #remove constant variables | |
| 369 | |
| 370 dataFiltered=dataMatrix[randomSelection,] | |
| 371 toRemove=which(unlist(apply(dataFiltered,1,var))==0) | |
| 372 if(length(toRemove)>0){ | |
| 373 dataFiltered=dataFiltered[-toRemove,] | |
| 374 } | |
| 375 ##geom_text(aes(label=Experiments,hjust=1, vjust=1.3), y = PC2+0.01) | |
| 376 PACres = prcomp(t(dataFiltered),scale.=TRUE) | |
| 377 | |
| 378 if(!is.null(opt$screePlot)){ | |
| 379 #scree plot | |
| 380 #p <- fviz_eig(PACres) | |
| 381 dataToPlot=data.frame(compo=seq(1,length(PACres$sdev)),var=(PACres$sdev^2/sum(PACres$sdev^2))*100) | |
| 382 p<-ggplot(data=dataToPlot, aes(x=compo, y=var)) + geom_bar(stat="identity", fill="steelblue") + geom_line() + geom_point() + | |
| 383 ggtitle("Scree plot") + theme_bw() + theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5)) + | |
| 384 xlab(label="Dimensions") + ylab(label="% explained variances") + scale_x_discrete(limits=dataToPlot$compo) | |
| 385 pp <- ggplotly(p) | |
| 386 | |
| 387 if(opt$format=="pdf"){ | |
| 388 pdf(paste(c("./plotDir/",opt$screePlot,".pdf"),collapse=""))}else{ | |
| 389 png(paste(c("./plotDir/",opt$screePlot,".png"),collapse="")) | |
| 390 } | |
| 391 plot(p) | |
| 392 dev.off() | |
| 393 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$screePlot,".html"),collapse=""),selfcontained = F) | |
| 394 } | |
| 395 | |
| 396 #now plot pca plots | |
| 397 | |
| 398 if(!is.null(opt$factorInfo)){ | |
| 399 fileIdent="" | |
| 400 symbolset = c("circle","cross","square","diamond","circle-open","square-open","diamond-open","x") | |
| 401 | |
| 402 #save equivalence between real factor names and generic ones in correspondanceNameTable | |
| 403 correspondanceNameTable=rbind(correspondanceNameTable,matrix(c(paste("Factor",1:(ncol(factorInfoMatrix)-1),sep=""),colnames(factorInfoMatrix)[-1]),ncol=2,nrow=ncol(factorInfoMatrix)-1)) | |
| 404 rownames(correspondanceNameTable)=correspondanceNameTable[,2] | |
| 405 | |
| 406 #first order factors from decreasing groups number | |
| 407 orderedFactors=colnames(factorInfoMatrix)[-1][order(unlist(lapply(colnames(factorInfoMatrix)[-1],function(x)length(table(factorInfoMatrix[,x])))),decreasing = T)] | |
| 408 allFactorsBigger=length(table(factorInfoMatrix[,orderedFactors[length(orderedFactors)]]))>length(symbolset) | |
| 409 if(allFactorsBigger)addComment("[WARNING]All factors are composed of too many groups to display two factors at same time, each PCA plot will display only one factor groups",T,opt$log,display=FALSE) | |
| 410 for(iFactor in 1:length(orderedFactors)){ | |
| 411 #if it is the last factor of the list or if all factor | |
| 412 if(iFactor==length(orderedFactors) || allFactorsBigger){ | |
| 413 if(length(orderedFactors)==1 || allFactorsBigger){ | |
| 414 dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x), Attribute1=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactor]], hoverLabel=unlist(lapply(rownames(PACres$x),function(x)paste(factorInfoMatrix[x,-1],collapse=",")))) | |
| 415 p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = 'scatter3d', mode="markers", color=~Attribute1,colors=rainbow(length(levels(dataToPlot$Attribute1))+2),hoverinfo = 'text', text = ~paste(Experiments,"\n",hoverLabel),marker=list(size=5))%>% | |
| 416 layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")), | |
| 417 legend=list(font = list(family = "sans-serif",size = 15,color = "#000"))) | |
| 418 fileIdent=correspondanceNameTable[orderedFactors[iFactor],1] | |
| 419 #add text label to plot | |
| 420 ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = 'text', inherit = F, text=rownames(PACres$x), hoverinfo='skip', showlegend = FALSE, color=I('black')) | |
| 421 #save the plotly plot | |
| 422 htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_",fileIdent,".html"),collapse=""),selfcontained = F) | |
| 423 } | |
| 424 }else{ | |
| 425 for(iFactorBis in (iFactor+1):length(orderedFactors)){ | |
| 426 if(length(table(factorInfoMatrix[,orderedFactors[iFactorBis]]))<=length(symbolset)){ | |
| 427 dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x), Attribute1=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactor]], Attribute2=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactorBis]], hoverLabel=unlist(lapply(rownames(PACres$x),function(x)paste(factorInfoMatrix[x,-1],collapse=",")))) | |
| 428 p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = 'scatter3d', mode="markers", color=~Attribute1,colors=rainbow(length(levels(dataToPlot$Attribute1))+2),symbol=~Attribute2,symbols = symbolset,hoverinfo = 'text', text = ~paste(Experiments,"\n",hoverLabel),marker=list(size=5))%>% | |
| 429 layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")), | |
| 430 legend=list(font = list(family = "sans-serif",size = 15,color = "#000"))) | |
| 431 fileIdent=paste(correspondanceNameTable[orderedFactors[c(iFactor,iFactorBis)],1],collapse="_AND_") | |
| 432 #add text label to plot | |
| 433 ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = 'text', inherit = F, text=rownames(PACres$x), hoverinfo='skip', showlegend = FALSE, color=I('black')) | |
| 434 #save the plotly plot | |
| 435 htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_",fileIdent,".html"),collapse=""),selfcontained = F) | |
| 436 }else{ | |
| 437 addComment(c("[WARNING]PCA with",orderedFactors[iFactor],"and",orderedFactors[iFactorBis],"groups cannot be displayed, too many groups (max",length(symbolset),")"),T,opt$log,display=FALSE) | |
| 438 } | |
| 439 } | |
| 440 } | |
| 441 } | |
| 442 }else{ | |
| 443 dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x)) | |
| 444 p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = 'scatter3d', mode="markers",marker=list(size=5,color="salmon"),hoverinfo = 'text',text = ~paste(Experiments))%>% | |
| 445 layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")), | |
| 446 legend=list(font = list(family = "sans-serif",size = 15,color = "#000"))) | |
| 447 ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = 'text', inherit = F, text=rownames(PACres$x), hoverinfo='skip', showlegend = FALSE, color=I('black')) | |
| 448 | |
| 449 #save plotly files | |
| 450 htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_plot.html"),collapse=""),selfcontained = F) | |
| 451 } | |
| 452 remove(p,dataToPlot,dataFiltered) | |
| 453 addComment("[INFO]ACP plot drawn",T,opt$log,display=FALSE) | |
| 454 } | |
| 455 | |
| 456 #write correspondances between plot file names and displayed names in figure legends, usefull to define html items in xml file | |
| 457 write.table(correspondanceNameTable,file=file.path(getwd(), "correspondanceFileNames.csv"),quote=FALSE,sep="\t",col.names = F,row.names = F) | |
| 458 | |
| 459 end.time <- Sys.time() | |
| 460 addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE) | |
| 461 | |
| 462 addComment("[INFO]End of R script",T,opt$log,display=FALSE) | |
| 463 | |
| 464 printSessionInfo(opt$log) | |
| 465 #sessionInfo() | 
