Mercurial > repos > vandelj > giant_hierarchical_clustering
comparison src/LIMMAscriptV4.R @ 0:14045c80a222 draft
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit cb276a594444c8f32e9819fefde3a21f121d35df"
author | vandelj |
---|---|
date | Fri, 26 Jun 2020 09:38:23 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:14045c80a222 |
---|---|
1 # A command-line interface for LIMMA to use with Galaxy | |
2 # written by Jimmy Vandel | |
3 # one of these arguments is required: | |
4 # | |
5 # | |
6 initial.options <- commandArgs(trailingOnly = FALSE) | |
7 file.arg.name <- "--file=" | |
8 script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)]) | |
9 script.basename <- dirname(script.name) | |
10 source(file.path(script.basename, "utils.R")) | |
11 source(file.path(script.basename, "getopt.R")) | |
12 | |
13 #addComment("Welcome R!") | |
14 | |
15 # setup R error handling to go to stderr | |
16 options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | |
17 | |
18 # we need that to not crash galaxy with an UTF8 error on German LC settings. | |
19 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
20 loc <- Sys.setlocale("LC_NUMERIC", "C") | |
21 | |
22 #get starting time | |
23 start.time <- Sys.time() | |
24 | |
25 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) | |
26 args <- commandArgs() | |
27 | |
28 # get options, using the spec as defined by the enclosed list. | |
29 # we read the options from the default: commandArgs(TRUE). | |
30 spec <- matrix(c( | |
31 "dataFile", "i", 1, "character", | |
32 "factorInfo","a", 1, "character", | |
33 "blockingInfo","b", 1, "character", | |
34 "dicoRenaming","g",1,"character", | |
35 "blockingPolicy","u", 1, "character", | |
36 "fdrThreshold","t", 1, "double", | |
37 "thresholdFC","d", 1, "double", | |
38 "format", "f", 1, "character", | |
39 "histo","h", 1, "character", | |
40 "volcano","v", 1, "character", | |
41 "factorsContrast","r", 1, "character", | |
42 "contrastNames","p", 1, "character", | |
43 "firstGroupContrast","m", 1, "character", | |
44 "secondGroupContrast","n", 1, "character", | |
45 "controlGroups","c", 1, "character", | |
46 "fratioFile","s",1,"character", | |
47 "organismID","x",1,"character", | |
48 "rowNameType","y",1,"character", | |
49 "quiet", "q", 0, "logical", | |
50 "log", "l", 1, "character", | |
51 "outputFile" , "o", 1, "character", | |
52 "outputDfFile" , "z", 1, "character"), | |
53 byrow=TRUE, ncol=4) | |
54 opt <- getopt(spec) | |
55 | |
56 # enforce the following required arguments | |
57 if (is.null(opt$log)) { | |
58 addComment("[ERROR]'log file' is required\n") | |
59 q( "no", 1, F ) | |
60 } | |
61 addComment("[INFO]Start of R script",T,opt$log,display=FALSE) | |
62 if (is.null(opt$dataFile)) { | |
63 addComment("[ERROR]'dataFile' is required",T,opt$log) | |
64 q( "no", 1, F ) | |
65 } | |
66 if (!is.null(opt$blockingInfo) && is.null(opt$blockingPolicy) ) { | |
67 addComment("[ERROR]blocking policy is missing",T,opt$log) | |
68 q( "no", 1, F ) | |
69 } | |
70 if (is.null(opt$dicoRenaming)) { | |
71 addComment("[ERROR]renaming dictionnary is missing",T,opt$log) | |
72 q( "no", 1, F ) | |
73 } | |
74 if (is.null(opt$factorsContrast)) { | |
75 addComment("[ERROR]factor informations are missing",T,opt$log) | |
76 q( "no", 1, F ) | |
77 } | |
78 if (length(opt$firstGroupContrast)!=length(opt$secondGroupContrast)) { | |
79 addComment("[ERROR]some contrast groups seems to be empty",T,opt$log) | |
80 q( "no", 1, F ) | |
81 } | |
82 if (is.null(opt$factorInfo)) { | |
83 addComment("[ERROR]factors info is missing",T,opt$log) | |
84 q( "no", 1, F ) | |
85 } | |
86 if (is.null(opt$format)) { | |
87 addComment("[ERROR]'output format' is required",T,opt$log) | |
88 q( "no", 1, F ) | |
89 } | |
90 if (is.null(opt$fdrThreshold)) { | |
91 addComment("[ERROR]'FDR threshold' is required",T,opt$log) | |
92 q( "no", 1, F ) | |
93 } | |
94 if (is.null(opt$outputFile) || is.null(opt$outputDfFile)){ | |
95 addComment("[ERROR]'output files' are required",T,opt$log) | |
96 q( "no", 1, F ) | |
97 } | |
98 if (is.null(opt$thresholdFC)){ | |
99 addComment("[ERROR]'FC threshold' is required",T,opt$log) | |
100 q( "no", 1, F ) | |
101 } | |
102 if (is.null(opt$fratioFile)) { | |
103 addComment("[ERROR]F-ratio parameter is missing",T,opt$log) | |
104 q( "no", 1, F ) | |
105 } | |
106 | |
107 #demande si le script sera bavard | |
108 verbose <- if (is.null(opt$quiet)) { | |
109 TRUE | |
110 }else{ | |
111 FALSE | |
112 } | |
113 | |
114 #paramètres internes | |
115 #pour savoir si on remplace les FC calculés par LIMMA par un calcul du LS-MEAN (ie moyenne de moyennes de chaque groupe dans chaque terme du contraste plutôt qu'une moyenne globale dans chaque terme) | |
116 useLSmean=FALSE | |
117 | |
118 addComment("[INFO]Parameters checked!",T,opt$log,display=FALSE) | |
119 | |
120 addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE) | |
121 addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE) | |
122 | |
123 #directory for plots | |
124 dir.create(file.path(getwd(), "plotDir")) | |
125 dir.create(file.path(getwd(), "plotLyDir")) | |
126 | |
127 #charge des packages silencieusement | |
128 suppressPackageStartupMessages({ | |
129 library("methods") | |
130 library("limma") | |
131 library("biomaRt") | |
132 library("ggplot2") | |
133 library("plotly") | |
134 library("stringr") | |
135 library("RColorBrewer") | |
136 }) | |
137 | |
138 | |
139 #chargement du fichier dictionnaire de renommage | |
140 renamingDico=read.csv(file=file.path(getwd(), opt$dicoRenaming),header=F,sep="\t",colClasses="character") | |
141 rownames(renamingDico)=renamingDico[,2] | |
142 | |
143 | |
144 #chargement des fichiers en entrée | |
145 expDataMatrix=read.csv(file=file.path(getwd(), opt$dataFile),header=F,sep="\t",colClasses="character") | |
146 #remove first row to convert it as colnames (to avoid X before colnames with header=T) | |
147 colNamesData=expDataMatrix[1,-1] | |
148 expDataMatrix=expDataMatrix[-1,] | |
149 #remove first colum to convert it as rownames | |
150 rowNamesData=expDataMatrix[,1] | |
151 expDataMatrix=expDataMatrix[,-1] | |
152 if(is.data.frame(expDataMatrix)){ | |
153 expDataMatrix=data.matrix(expDataMatrix) | |
154 }else{ | |
155 expDataMatrix=data.matrix(as.numeric(expDataMatrix)) | |
156 } | |
157 dimnames(expDataMatrix)=list(rowNamesData,colNamesData) | |
158 | |
159 #test the number of rows that are constant in dataMatrix | |
160 nbConstantRows=length(which(unlist(apply(expDataMatrix,1,var))==0)) | |
161 if(nbConstantRows>0){ | |
162 addComment(c("[WARNING]",nbConstantRows,"rows are constant across conditions in input data file"),T,opt$log,display=FALSE) | |
163 } | |
164 | |
165 #test if all condition names are present in dico | |
166 if(!all(colnames(expDataMatrix) %in% rownames(renamingDico))){ | |
167 addComment("[ERROR]Missing condition names in renaming dictionary",T,opt$log) | |
168 q( "no", 1, F ) | |
169 } | |
170 | |
171 addComment("[INFO]Expression data loaded and checked",T,opt$log,display=FALSE) | |
172 addComment(c("[INFO]Dim of expression matrix:",dim(expDataMatrix)),T,opt$log,display=FALSE) | |
173 | |
174 #chargement du fichier des facteurs | |
175 factorInfoMatrix=read.csv(file=file.path(getwd(), opt$factorInfo),header=F,sep="\t",colClasses="character") | |
176 #remove first row to convert it as colnames | |
177 colnames(factorInfoMatrix)=factorInfoMatrix[1,] | |
178 factorInfoMatrix=factorInfoMatrix[-1,] | |
179 #use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case | |
180 rownames(factorInfoMatrix)=factorInfoMatrix[,1] | |
181 | |
182 if(length(setdiff(colnames(expDataMatrix),rownames(factorInfoMatrix)))!=0){ | |
183 addComment("[ERROR]Missing samples in factor file",T,opt$log) | |
184 q( "no", 1, F ) | |
185 } | |
186 | |
187 #order sample as in expression matrix and remove spurious sample | |
188 factorInfoMatrix=factorInfoMatrix[colnames(expDataMatrix),] | |
189 | |
190 #test if all values names are present in dico | |
191 if(!all(unlist(factorInfoMatrix) %in% rownames(renamingDico))){ | |
192 addComment("[ERROR]Missing factor names in renaming dictionary",T,opt$log) | |
193 q( "no", 1, F ) | |
194 } | |
195 | |
196 addComment("[INFO]Factors OK",T,opt$log,display=FALSE) | |
197 addComment(c("[INFO]Dim of factorInfo matrix:",dim(factorInfoMatrix)),T,opt$log,display=FALSE) | |
198 | |
199 ##manage blocking factor | |
200 blockingFactor=NULL | |
201 blockinFactorsList=NULL | |
202 if(!is.null(opt$blockingInfo)){ | |
203 | |
204 #chargement du fichier des blocking factors | |
205 blockingInfoMatrix=read.csv(file=file.path(getwd(), opt$blockingInfo),header=F,sep="\t",colClasses="character") | |
206 #remove first row to convert it as colnames | |
207 colnames(blockingInfoMatrix)=blockingInfoMatrix[1,] | |
208 blockingInfoMatrix=blockingInfoMatrix[-1,] | |
209 #use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case | |
210 rownames(blockingInfoMatrix)=blockingInfoMatrix[,1] | |
211 | |
212 | |
213 if(length(setdiff(colnames(expDataMatrix),rownames(blockingInfoMatrix)))!=0){ | |
214 addComment("[ERROR]Missing samples in blocking factor file",T,opt$log) | |
215 q( "no", 1, F ) | |
216 } | |
217 | |
218 #order sample as in expression matrix | |
219 blockingInfoMatrix=blockingInfoMatrix[colnames(expDataMatrix),] | |
220 | |
221 #test if all blocking names are present in dico | |
222 if(!all(unlist(blockingInfoMatrix) %in% rownames(renamingDico))){ | |
223 addComment("[ERROR]Missing blocking names in renaming dictionary",T,opt$log) | |
224 q( "no", 1, F ) | |
225 } | |
226 | |
227 #remove blocking factors allready present as real factors | |
228 blockingNotInMainFactors=setdiff(colnames(blockingInfoMatrix)[-1],colnames(factorInfoMatrix)[-1]) | |
229 | |
230 if(length(blockingNotInMainFactors)<(ncol(blockingInfoMatrix)-1))addComment("[WARNING]Blocking factors cannot be principal factors",T,opt$log,display=FALSE) | |
231 | |
232 if(length(blockingNotInMainFactors)>0){ | |
233 | |
234 blockingInfoMatrix=blockingInfoMatrix[,c(colnames(blockingInfoMatrix)[1],blockingNotInMainFactors)] | |
235 | |
236 groupBlocking=rep("c",ncol(expDataMatrix)) | |
237 #for each blocking factor | |
238 for(blockingFact in blockingNotInMainFactors){ | |
239 if(opt$blockingPolicy=="correlated"){ | |
240 indNewFact=as.numeric(factor(blockingInfoMatrix[,blockingFact])) | |
241 groupBlocking=paste(groupBlocking,indNewFact,sep="_") | |
242 }else{ | |
243 if(is.null(blockinFactorsList))blockinFactorsList=list() | |
244 blockinFactorsList[[blockingFact]]=factor(unlist(lapply(blockingInfoMatrix[,blockingFact],function(x)paste(c(blockingFact,"_",x),collapse="")))) | |
245 } | |
246 } | |
247 if(opt$blockingPolicy=="correlated"){ | |
248 blockingFactor=factor(groupBlocking) | |
249 if(length(levels(blockingFactor))==1){ | |
250 addComment("[ERROR]Selected blocking factors seems to be constant",T,opt$log) | |
251 q( "no", 1, F ) | |
252 } | |
253 } | |
254 addComment("[INFO]Blocking info OK",T,opt$log,display=FALSE) | |
255 }else{ | |
256 addComment("[WARNING]No blocking factors will be considered",T,opt$log,display=FALSE) | |
257 } | |
258 } | |
259 | |
260 | |
261 ##rename different input parameters using renamingDictionary | |
262 opt$factorsContrast=renamingDico[unlist(lapply(unlist(strsplit(opt$factorsContrast,",")),function(x)which(renamingDico[,1]==x))),2] | |
263 | |
264 userDefinedContrasts=FALSE | |
265 if(!is.null(opt$firstGroupContrast) && !is.null(opt$secondGroupContrast)){ | |
266 userDefinedContrasts=TRUE | |
267 for(iContrast in 1:length(opt$firstGroupContrast)){ | |
268 opt$firstGroupContrast[iContrast]=paste(unlist(lapply(unlist(strsplit(opt$firstGroupContrast[iContrast],",")),function(x)paste(renamingDico[unlist(lapply(unlist(strsplit(x,"\\*")),function(x)which(renamingDico[,1]==x))),2],collapse="*"))),collapse=",") | |
269 opt$secondGroupContrast[iContrast]=paste(unlist(lapply(unlist(strsplit(opt$secondGroupContrast[iContrast],",")),function(x)paste(renamingDico[unlist(lapply(unlist(strsplit(x,"\\*")),function(x)which(renamingDico[,1]==x))),2],collapse="*"))),collapse=",") | |
270 } | |
271 } | |
272 | |
273 if(!is.null(opt$controlGroups)){ | |
274 renamedGroups=c() | |
275 for(iGroup in unlist(strsplit(opt$controlGroups,","))){ | |
276 renamedControlGroup=paste(renamingDico[unlist(lapply(unlist(strsplit(iGroup,":")),function(x)which(renamingDico[,1]==x))),2],collapse=":") | |
277 if(length(renamedControlGroup)==0 || any(which(unlist(gregexpr(text = renamedControlGroup,pattern = ":"))==-1))){ | |
278 addComment("[ERROR]Control groups for interaction seem to mismatch, please check them.",T,opt$log) | |
279 q( "no", 1, F ) | |
280 } | |
281 renamedGroups=c(renamedGroups,renamedControlGroup) | |
282 } | |
283 opt$controlGroups=renamedGroups | |
284 } | |
285 addComment("[INFO]Contrast variables are renamed to avoid confusion",T,opt$log,display=FALSE) | |
286 ##renaming done | |
287 | |
288 #to convert factor as numeric value --> useless now ? | |
289 #expDataMatrix=apply(expDataMatrix,c(1,2),function(x)as.numeric(paste(x))) | |
290 | |
291 #get factors info for LIMMA | |
292 factorsList=list() | |
293 for(iFactor in opt$factorsContrast){ | |
294 if(!(iFactor %in% colnames(factorInfoMatrix))){ | |
295 addComment("[ERROR]Required factors are missing in input file",T,opt$log) | |
296 q( "no", 1, F ) | |
297 } | |
298 factorsList[[iFactor]]=factor(unlist(lapply(factorInfoMatrix[,iFactor],function(x)paste(c(iFactor,"_",x),collapse="")))) | |
299 if(length(levels(factorsList[[iFactor]]))==1){ | |
300 addComment("[ERROR]One selected factor seems to be constant",T,opt$log) | |
301 q( "no", 1, F ) | |
302 } | |
303 } | |
304 | |
305 #check if there is at least 2 factors to allow interaction computation | |
306 if(!is.null(opt$controlGroups) && length(factorsList)<2){ | |
307 addComment("[ERROR]You cannot ask for interaction with less than 2 factors",T,opt$log) | |
308 q( "no", 1, F ) | |
309 } | |
310 | |
311 #merge all factors as a single one | |
312 factorsMerged=as.character(factorsList[[opt$factorsContrast[1]]]) | |
313 for(iFactor in opt$factorsContrast[-1]){ | |
314 factorsMerged=paste(factorsMerged,as.character(factorsList[[iFactor]]),sep=".") | |
315 } | |
316 factorsMerged=factor(factorsMerged) | |
317 | |
318 #checked that coefficient number (ie. factorsMerged levels) is strictly smaller than sample size | |
319 if(length(levels(factorsMerged))>=length(factorsMerged)){ | |
320 addComment(c("[ERROR]No enough samples (",length(factorsMerged),") to estimate ",length(levels(factorsMerged))," coefficients"),T,opt$log) | |
321 q( "no", 1, F ) | |
322 } | |
323 | |
324 #get the sample size of each factor values | |
325 sampleSizeFactor=table(factorsMerged) | |
326 | |
327 | |
328 if(!is.null(blockinFactorsList)){ | |
329 factorString=c("blockinFactorsList[['", names(blockinFactorsList)[1],"']]") | |
330 for(blockingFact in names(blockinFactorsList)[-1]){ | |
331 factorString=c(factorString," + blockinFactorsList[['",blockingFact,"']]") | |
332 } | |
333 design = model.matrix(as.formula(paste(c("~ factorsMerged +",factorString," + 0"),collapse=""))) | |
334 | |
335 #rename design columns | |
336 coeffMeaning = levels(factorsMerged) | |
337 for(blockingFact in blockinFactorsList){ | |
338 coeffMeaning=c(coeffMeaning,levels(blockingFact)[-1]) | |
339 } | |
340 colnames(design) = coeffMeaning | |
341 }else{ | |
342 design = model.matrix(as.formula( ~ factorsMerged + 0)) | |
343 | |
344 #rename degin columns | |
345 coeffMeaning = levels(factorsMerged) | |
346 colnames(design) = coeffMeaning | |
347 } | |
348 | |
349 addComment(c("[INFO]Available coefficients: ",coeffMeaning),T,opt$log,display=F) | |
350 | |
351 estimableCoeff=which(colSums(design)!=0) | |
352 | |
353 addComment("[INFO]Design done",T,opt$log,display=F) | |
354 | |
355 #use blocking factor if exists | |
356 if(!is.null(blockingFactor)){ | |
357 corfit <- duplicateCorrelation(expDataMatrix, design, block=blockingFactor) | |
358 | |
359 addComment(c("[INFO]Correlation within groups: ",corfit$consensus.correlation),T,opt$log,display=F) | |
360 | |
361 #run linear model fit | |
362 data.fit = lmFit(expDataMatrix,design,block = blockingFactor, correlation=corfit$consensus.correlation) | |
363 }else{ | |
364 #run linear model fit | |
365 data.fit = lmFit(expDataMatrix,design) | |
366 } | |
367 | |
368 estimatedCoeff=which(!is.na(data.fit$coefficients[1,])) | |
369 | |
370 addComment("[INFO]Lmfit done",T,opt$log,display=F) | |
371 | |
372 #catch situation where some coefficients cannot be estimated, probably due to dependances between design columns | |
373 #if(length(setdiff(estimableCoeff,estimatedCoeff))>0){ | |
374 # addComment("[ERROR]Error in design matrix, check your group definitions",T,opt$log) | |
375 # q( "no", 1, F ) | |
376 #} | |
377 #to strong condition, should return ERROR only when coefficients relative to principal factors cannot be estimated, otherwise, return a simple WARNING | |
378 | |
379 #define requested contrasts | |
380 requiredContrasts=c() | |
381 humanReadingContrasts=c() | |
382 persoContrastName=c() | |
383 if(userDefinedContrasts){ | |
384 for(iContrast in 1:length(opt$firstGroupContrast)){ | |
385 posGroup=unlist(lapply(unlist(strsplit(opt$firstGroupContrast[iContrast],",")),function(x)paste(paste(opt$factorsContrast,unlist(strsplit(x,"\\*")),sep="_"),collapse="."))) | |
386 negGroup=unlist(lapply(unlist(strsplit(opt$secondGroupContrast[iContrast],",")),function(x)paste(paste(opt$factorsContrast,unlist(strsplit(x,"\\*")),sep="_"),collapse="."))) | |
387 #clear posGroup and negGroup from empty groups | |
388 emptyPosGroups=which(!(posGroup%in%coeffMeaning)) | |
389 if(length(emptyPosGroups)>0){ | |
390 addComment(c("[WARNING]The group(s)",posGroup[emptyPosGroups],"is/are removed from contrast as it/they is/are empty"),T,opt$log,display=FALSE) | |
391 posGroup=posGroup[-emptyPosGroups] | |
392 currentHumanContrast=paste(unlist(strsplit(opt$firstGroupContrast[iContrast],","))[-emptyPosGroups],collapse="+") | |
393 }else{ | |
394 currentHumanContrast=paste(unlist(strsplit(opt$firstGroupContrast[iContrast],",")),collapse="+") | |
395 } | |
396 emptyNegGroups=which(!(negGroup%in%coeffMeaning)) | |
397 if(length(emptyNegGroups)>0){ | |
398 addComment(c("[WARNING]The group(s)",negGroup[emptyNegGroups],"is/are removed from contrast as it/they is/are empty"),T,opt$log,display=FALSE) | |
399 negGroup=negGroup[-emptyNegGroups] | |
400 currentHumanContrast=paste(c(currentHumanContrast,unlist(strsplit(opt$secondGroupContrast[iContrast],","))[-emptyNegGroups]),collapse="-") | |
401 }else{ | |
402 currentHumanContrast=paste(c(currentHumanContrast,unlist(strsplit(opt$secondGroupContrast[iContrast],","))),collapse="-") | |
403 } | |
404 if(length(posGroup)==0 || length(negGroup)==0 ){ | |
405 addComment(c("[WARNING]Contrast",currentHumanContrast,"cannot be estimated due to empty group"),T,opt$log,display=FALSE) | |
406 }else{ | |
407 if(all(posGroup%in%negGroup) && all(negGroup%in%posGroup)){ | |
408 addComment(c("[WARNING]Contrast",currentHumanContrast,"cannot be estimated due to null contrast"),T,opt$log,display=FALSE) | |
409 }else{ | |
410 #get coefficients required for first group added as positive | |
411 positiveCoeffWeights=sampleSizeFactor[posGroup]/sum(sampleSizeFactor[posGroup]) | |
412 #positiveCoeffWeights=rep(1,length(posGroup)) | |
413 #names(positiveCoeffWeights)=names(sampleSizeFactor[posGroup]) | |
414 #get coefficients required for second group added as negative | |
415 negativeCoeffWeights=sampleSizeFactor[negGroup]/sum(sampleSizeFactor[negGroup]) | |
416 #negativeCoeffWeights=rep(1,length(negGroup)) | |
417 #names(negativeCoeffWeights)=names(sampleSizeFactor[negGroup]) | |
418 #build the resulting contrast | |
419 currentContrast=paste(paste(positiveCoeffWeights[posGroup],posGroup,sep="*"),collapse="+") | |
420 currentContrast=paste(c(currentContrast,paste(paste(negativeCoeffWeights[negGroup],negGroup,sep="*"),collapse="-")),collapse="-") | |
421 requiredContrasts=c(requiredContrasts,currentContrast) | |
422 | |
423 #build the human reading contrast | |
424 humanReadingContrasts=c(humanReadingContrasts,currentHumanContrast) | |
425 if(!is.null(opt$contrastNames) && nchar(opt$contrastNames[iContrast])>0){ | |
426 persoContrastName=c(persoContrastName,opt$contrastNames[iContrast]) | |
427 }else{ | |
428 persoContrastName=c(persoContrastName,"") | |
429 } | |
430 | |
431 addComment(c("[INFO]Contrast added : ",currentHumanContrast),T,opt$log,display=F) | |
432 addComment(c("with complete formula ",currentContrast),T,opt$log,display=F) | |
433 } | |
434 } | |
435 } | |
436 } | |
437 | |
438 | |
439 #define the true formula with interactions to get interaction coefficients | |
440 factorString=c("factorsList[['", names(factorsList)[1],"']]") | |
441 for(iFactor in names(factorsList)[-1]){ | |
442 factorString=c(factorString," * factorsList[['",iFactor,"']]") | |
443 } | |
444 | |
445 if(!is.null(blockinFactorsList)){ | |
446 for(blockingFact in names(blockinFactorsList)){ | |
447 factorString=c(factorString," + blockinFactorsList[['",blockingFact,"']]") | |
448 } | |
449 } | |
450 | |
451 #should not be null at the end | |
452 allFtestMeanSquare=NULL | |
453 #to get the F-test values | |
454 estimatedInteractions=rownames(anova(lm(as.formula(paste(c("expDataMatrix[1,] ~ ",factorString),collapse=""))))) | |
455 estimatedInteractions=c(unlist(lapply(estimatedInteractions[-length(estimatedInteractions)],function(x){temp=unlist(strsplit(x,"[ \" | : ]"));paste(temp[seq(2,length(temp),3)],collapse=":")})),estimatedInteractions[length(estimatedInteractions)]) | |
456 #rename estimated interaction terms using renamingDico | |
457 estimatedInteractions=c(unlist(lapply(estimatedInteractions[-length(estimatedInteractions)],function(x)paste(renamingDico[unlist(strsplit(x,":")),1],collapse=":"))),estimatedInteractions[length(estimatedInteractions)]) | |
458 t <- unlist(apply(expDataMatrix,1,function(x){temp=anova(lm(as.formula(paste(c("x ~ ",factorString),collapse=""))))$`Mean Sq`;temp/temp[length(temp)]})) | |
459 allFtestMeanSquare <- t(matrix(t,nrow=length(estimatedInteractions))) | |
460 #remove from allFtest rows containing NA | |
461 if(length(which(is.na(allFtestMeanSquare[,1])))>0)allFtestMeanSquare=allFtestMeanSquare[-(which(is.na(allFtestMeanSquare[,1]))),] | |
462 colnames(allFtestMeanSquare)=estimatedInteractions | |
463 | |
464 #add contrasts corresponding to interaction terms | |
465 if(!is.null(opt$controlGroups)){ | |
466 #first load user defined control group for each factor | |
467 controlGroup=rep(NA,length(factorsList)) | |
468 names(controlGroup)=names(factorsList) | |
469 for(iGroup in opt$controlGroups){ | |
470 splitGroup=unlist(strsplit(iGroup,":")) | |
471 splitGroup[2]=paste(splitGroup[1],splitGroup[2],sep = "_") | |
472 #check if defined control group is really a level of the corresponding factor | |
473 if(!splitGroup[1]%in%names(controlGroup) || !splitGroup[2]%in%factorsList[[splitGroup[1]]]){ | |
474 addComment(c("[ERROR]The factor name",splitGroup[1],"does not exist or group name",splitGroup[2]),T,opt$log) | |
475 q( "no", 1, F ) | |
476 } | |
477 if(!is.na(controlGroup[splitGroup[1]])){ | |
478 addComment("[ERROR]Several control groups are defined for the same factor, please select only one control group for each factor if you want to compute interaction contrasts",T,opt$log) | |
479 q( "no", 1, F ) | |
480 } | |
481 controlGroup[splitGroup[1]]=splitGroup[2] | |
482 } | |
483 | |
484 #check if all factor have a defined control group | |
485 if(any(is.na(controlGroup))){ | |
486 addComment("[ERROR]Missing control group for some factors, please check them if you want to compute interaction contrasts",T,opt$log) | |
487 q( "no", 1, F ) | |
488 } | |
489 | |
490 nbFactors=length(factorsList) | |
491 interactionContrasts=c() | |
492 contrastClass=c() | |
493 #initialize list for the first level | |
494 newPreviousLoopContrast=list() | |
495 for(iFactorA in 1:(nbFactors-1)){ | |
496 nameFactorA=names(factorsList)[iFactorA] | |
497 compA=c() | |
498 for(levelA in setdiff(levels(factorsList[[iFactorA]]),controlGroup[nameFactorA])){ | |
499 compA=c(compA,paste(levelA,controlGroup[nameFactorA],sep="-")) | |
500 } | |
501 newPreviousLoopContrast[[as.character(iFactorA)]]=compA | |
502 } | |
503 #make a loop for growing interaction set | |
504 for(globalIfactor in 1:(nbFactors-1)){ | |
505 previousLoopContrast=newPreviousLoopContrast | |
506 newPreviousLoopContrast=list() | |
507 #factor A reuse contrasts made at previsous loop | |
508 for(iFactorA in names(previousLoopContrast)){ | |
509 compA=previousLoopContrast[[iFactorA]] | |
510 | |
511 if(max(as.integer(unlist(strsplit(iFactorA,"\\."))))<nbFactors){ | |
512 #factor B is the new factor to include in intreraction set | |
513 for(iFactorB in (max(as.integer(unlist(strsplit(iFactorA,"\\."))))+1):nbFactors){ | |
514 nameFactorB=names(factorsList)[iFactorB] | |
515 #keep contrasts identified trough interacting factors set | |
516 newPreviousLoopContrast[[paste(iFactorA,iFactorB,sep=".")]]=c() | |
517 for(iCompA in compA){ | |
518 for(levelB in setdiff(levels(factorsList[[iFactorB]]),controlGroup[nameFactorB])){ | |
519 #decompose the contrast compA to apply the new level of factor B on each term | |
520 temp=unlist(strsplit(iCompA,"[ + ]")) | |
521 splitCompA=temp[1] | |
522 for(iTemp in temp[-1])splitCompA=c(splitCompA,"+",iTemp) | |
523 splitCompA=unlist(lapply(splitCompA,function(x){temp=unlist(strsplit(x,"-"));splitCompB=temp[1];for(iTemp in temp[-1])splitCompB=c(splitCompB,"-",iTemp);splitCompB})) | |
524 #apply on each contrast term the new level of factor B | |
525 firstTerm=paste(unlist(lapply(splitCompA,function(x)if(x!="+" && x!="-"){paste(x,levelB,sep=".")}else{x})),collapse="") | |
526 secondTerm=negativeExpression(paste(unlist(lapply(splitCompA,function(x)if(x!="+" && x!="-"){paste(x,controlGroup[nameFactorB],sep=".")}else{x})),collapse="")) | |
527 currentContrast=paste(c(firstTerm,secondTerm),collapse="") | |
528 | |
529 newPreviousLoopContrast[[paste(iFactorA,iFactorB,sep=".")]]=c(newPreviousLoopContrast[[paste(iFactorA,iFactorB,sep=".")]],currentContrast) | |
530 } | |
531 } | |
532 } | |
533 } | |
534 } | |
535 for(iContrast in names(newPreviousLoopContrast)){ | |
536 contrastClass=c(contrastClass,rep(iContrast,length(newPreviousLoopContrast[[iContrast]]))) | |
537 } | |
538 interactionContrasts=c(interactionContrasts,unlist(newPreviousLoopContrast)) | |
539 } | |
540 #make human title for interactions | |
541 names(interactionContrasts)=contrastClass | |
542 humanReadingInteraction=unlist(lapply(interactionContrasts,function(x)gsub("\\.",":",unlist(strsplit(x,"[+-]"))[1]))) | |
543 | |
544 contrastToIgnore=c() | |
545 | |
546 #complete with control groups and order to match to coeffs | |
547 for(iContrast in 1:length(interactionContrasts)){ | |
548 missingFactor=setdiff(1:nbFactors,as.integer(unlist(strsplit(names(interactionContrasts[iContrast]),"\\.")))) | |
549 #decompose the contrast | |
550 temp=unlist(strsplit(interactionContrasts[iContrast],"[ + ]")) | |
551 splitContrast=temp[1] | |
552 for(iTemp in temp[-1])splitContrast=c(splitContrast,"+",iTemp) | |
553 splitContrast=unlist(lapply(splitContrast,function(x){temp=unlist(strsplit(x,"-"));splitCompB=temp[1];for(iTemp in temp[-1])splitCompB=c(splitCompB,"-",iTemp);splitCompB})) | |
554 for(iFactor in missingFactor){ | |
555 for(iTerm in 1:length(splitContrast)){ | |
556 if(splitContrast[iTerm]!="+" && splitContrast[iTerm]!="-"){ | |
557 splitTerm=unlist(strsplit(splitContrast[iTerm],"\\.")) | |
558 if(iFactor==1)splitContrast[iTerm]=paste(c(controlGroup[names(factorsList)[iFactor]],splitTerm),collapse=".") | |
559 if(iFactor==nbFactors)splitContrast[iTerm]=paste(c(splitTerm,controlGroup[names(factorsList)[iFactor]]),collapse=".") | |
560 if(iFactor>1 && iFactor<nbFactors)splitContrast[iTerm]=paste(c(splitTerm[1:(iFactor-1)],controlGroup[names(factorsList)[iFactor]],splitTerm[iFactor:length(splitTerm)]),collapse=".") | |
561 } | |
562 } | |
563 } | |
564 interactionContrasts[iContrast]=paste(splitContrast,collapse="") | |
565 if(all(splitContrast[seq(1,length(splitContrast),2)]%in%coeffMeaning)){ | |
566 addComment(c("[INFO]Interaction contrast added : ",humanReadingInteraction[iContrast]),T,opt$log,display=F) | |
567 addComment(c("with complete formula ",interactionContrasts[iContrast]),T,opt$log,display=F) | |
568 }else{ | |
569 contrastToIgnore=c(contrastToIgnore,iContrast) | |
570 addComment(c("[WARNING]Interaction contrast",humanReadingInteraction[iContrast],"is removed due to empty group"),T,opt$log,display=F) | |
571 } | |
572 } | |
573 | |
574 #add interaction contrasts to global contrast list | |
575 if(length(contrastToIgnore)>0){ | |
576 requiredContrasts=c(requiredContrasts,interactionContrasts[-contrastToIgnore]) | |
577 humanReadingContrasts=c(humanReadingContrasts,humanReadingInteraction[-contrastToIgnore]) | |
578 persoContrastName=c(persoContrastName,rep("",length(humanReadingInteraction[-contrastToIgnore]))) | |
579 }else{ | |
580 requiredContrasts=c(requiredContrasts,interactionContrasts) | |
581 humanReadingContrasts=c(humanReadingContrasts,humanReadingInteraction) | |
582 persoContrastName=c(persoContrastName,rep("",length(humanReadingInteraction))) | |
583 } | |
584 }#end of intreaction contrasts | |
585 | |
586 | |
587 #remove from requiredContrasts contrasts that cannot be estimated | |
588 toRemove=unique(unlist(lapply(setdiff(coeffMeaning,names(estimatedCoeff)),function(x)grep(x,requiredContrasts)))) | |
589 if(length(toRemove)>0){ | |
590 addComment(c("[WARNING]",length(toRemove)," contrasts are removed, due to missing coefficients"),T,opt$log,display=FALSE) | |
591 requiredContrasts=requiredContrasts[-toRemove] | |
592 humanReadingContrasts=humanReadingContrasts[-toRemove] | |
593 persoContrastName=persoContrastName[-toRemove] | |
594 } | |
595 | |
596 if(length(requiredContrasts)==0){ | |
597 addComment("[ERROR]No contrast to compute, please check your contrast definition.",T,opt$log) | |
598 q( "no", 1, F ) | |
599 } | |
600 | |
601 #compute for each contrast mean of coefficients in posGroup and negGroup for FC computation of log(FC) with LSmean as in Partek | |
602 meanPosGroup=list() | |
603 meanNegGroup=list() | |
604 for(iContrast in 1:length(requiredContrasts)){ | |
605 #define posGroup and negGroup | |
606 #first split contrast | |
607 temp=unlist(strsplit(requiredContrasts[iContrast],"[ + ]")) | |
608 splitContrast=temp[1] | |
609 for(iTemp in temp[-1])splitContrast=c(splitContrast,"+",iTemp) | |
610 splitContrast=unlist(lapply(splitContrast,function(x){temp=unlist(strsplit(x,"-"));splitCompB=temp[1];for(iTemp in temp[-1])splitCompB=c(splitCompB,"-",iTemp);splitCompB})) | |
611 #and then put each term in good group | |
612 posGroup=c() | |
613 negGroup=c() | |
614 nextIsPos=TRUE | |
615 for(iSplit in splitContrast){ | |
616 if(iSplit=="+")nextIsPos=TRUE | |
617 if(iSplit=="-")nextIsPos=FALSE | |
618 if(iSplit!="-" && iSplit!="+"){ | |
619 #remove weights of contrast terms | |
620 iSplitBis=unlist(strsplit(iSplit,"[*]")) | |
621 iSplitBis=iSplitBis[length(iSplitBis)] | |
622 if(nextIsPos)posGroup=c(posGroup,iSplitBis) | |
623 else negGroup=c(negGroup,iSplitBis) | |
624 } | |
625 } | |
626 #compute means for each group | |
627 meanPosGroup[[iContrast]]=apply(as.matrix(data.fit$coefficients[,posGroup],ncol=length(posGroup)),1,mean) | |
628 meanNegGroup[[iContrast]]=apply(as.matrix(data.fit$coefficients[,negGroup],ncol=length(negGroup)),1,mean) | |
629 } | |
630 | |
631 | |
632 contrast.matrix = makeContrasts(contrasts=requiredContrasts,levels=design) | |
633 data.fit.con = contrasts.fit(data.fit,contrast.matrix) | |
634 | |
635 addComment("[INFO]Contrast definition done",T,opt$log,T,display=FALSE) | |
636 | |
637 #compute LIMMA statistics | |
638 data.fit.eb = eBayes(data.fit.con) | |
639 | |
640 addComment("[INFO]Estimation done",T,opt$log,T,display=FALSE) | |
641 | |
642 #adjust p.value through FDR | |
643 data.fit.eb$adj_p.value=data.fit.eb$p.value | |
644 for(iComparison in 1:ncol(data.fit.eb$adj_p.value)){ | |
645 data.fit.eb$adj_p.value[,iComparison]=p.adjust(data.fit.eb$p.value[,iComparison],"fdr") | |
646 } | |
647 | |
648 #add a new field based on LS-means for each contrast instead of global mean like they were calculated in coefficients field | |
649 data.fit.eb$coefficientsLS=data.fit.eb$coefficients | |
650 if(ncol(data.fit.eb$coefficients)!=length(meanPosGroup)){ | |
651 addComment("[ERROR]Estimated contrasts number unexpected",T,opt$log) | |
652 q( "no", 1, F ) | |
653 } | |
654 for(iContrast in 1:length(meanPosGroup)){ | |
655 data.fit.eb$coefficientsLS[,iContrast]=meanPosGroup[[iContrast]][rownames(data.fit.eb$coefficientsLS)]-meanNegGroup[[iContrast]][rownames(data.fit.eb$coefficientsLS)] | |
656 } | |
657 | |
658 #if requested replace coefficient computed as global mean by LS-means values | |
659 if(useLSmean)data.fit.eb$coefficients=data.fit.eb$coefficientsLS | |
660 | |
661 addComment("[INFO]Core treatment done",T,opt$log,T,display=FALSE) | |
662 | |
663 | |
664 ##convert humanReadingContrasts with namingDictionary to create humanReadingContrastsRenamed and keep original humanReadingContrasts names for file names | |
665 humanReadingContrastsRenamed=rep("",length(humanReadingContrasts)) | |
666 for(iContrast in 1:length(humanReadingContrasts)){ | |
667 if(persoContrastName[iContrast]==""){ | |
668 #if(verbose)addComment(humanReadingContrasts[iContrast]) | |
669 specialCharacters=str_extract_all(humanReadingContrasts[iContrast],"[+|*|_|:|-]")[[1]] | |
670 #if(verbose)addComment(specialCharacters) | |
671 nameConverted=unlist(lapply(strsplit(humanReadingContrasts[iContrast],"[+|*|_|:|-]")[[1]],function(x)renamingDico[x,1])) | |
672 #if(verbose)addComment(nameConverted) | |
673 humanReadingContrastsRenamed[iContrast]=paste(nameConverted,specialCharacters,collapse="",sep="") | |
674 #if(verbose)addComment(humanReadingContrastsRenamed[iContrast]) | |
675 humanReadingContrastsRenamed[iContrast]=substr(humanReadingContrastsRenamed[iContrast],1,nchar(humanReadingContrastsRenamed[iContrast])-1) | |
676 }else{ | |
677 humanReadingContrastsRenamed[iContrast]=persoContrastName[iContrast] | |
678 } | |
679 } | |
680 | |
681 #write correspondances between plot file names (humanReadingContrasts) and displayed names in figure legends (humanReadingContrastsRenamed), usefull to define html items in xml file | |
682 correspondanceTable=matrix("",ncol=2,nrow=ncol(data.fit.eb$p.value)) | |
683 correspondanceTable[,1]=unlist(lapply(humanReadingContrasts,function(x)gsub(":","_INT_",gsub("\\+","_PLUS_",gsub("\\*","_AND_",x))))) | |
684 correspondanceTable[,2]=humanReadingContrastsRenamed | |
685 rownames(correspondanceTable)=correspondanceTable[,2] | |
686 write.table(correspondanceTable,file=file.path(getwd(), "correspondanceFileNames.csv"),quote=FALSE,sep="\t",col.names = F,row.names = F) | |
687 | |
688 #plot nominal p-val histograms for selected comparisons | |
689 histogramPerPage=6 | |
690 if (!is.null(opt$histo)) { | |
691 iToPlot=1 | |
692 plotVector=list() | |
693 nbComparisons=ncol(data.fit.eb$p.value) | |
694 for (iComparison in 1:nbComparisons){ | |
695 dataToPlot=data.frame(pval=data.fit.eb$p.value[,iComparison],id=rownames(data.fit.eb$p.value)) | |
696 p <- ggplot(data=dataToPlot, aes(x=pval)) + geom_histogram(colour="red", fill="salmon") + | |
697 theme_bw() + ggtitle(humanReadingContrastsRenamed[iComparison]) + ylab(label="Frequencies") + xlab(label="Nominal p-val") + | |
698 theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5)) | |
699 plotVector[[length(plotVector)+1]]=p | |
700 | |
701 pp <- ggplotly(p) | |
702 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$histo,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".html"),collapse=""),selfcontained = F) | |
703 | |
704 if(iComparison==nbComparisons || length(plotVector)==histogramPerPage){ | |
705 #plot and close the actual plot | |
706 if(opt$format=="pdf"){ | |
707 pdf(paste(c("./plotDir/",opt$histo,iToPlot,".pdf"),collapse=""))}else{ | |
708 png(paste(c("./plotDir/",opt$histo,iToPlot,".png"),collapse="")) | |
709 } | |
710 multiplot(plotlist=plotVector,cols=2) | |
711 dev.off() | |
712 if(iComparison<nbComparisons){ | |
713 #prepare for a new plotting file if necessary | |
714 plotVector=list() | |
715 iToPlot=iToPlot+1 | |
716 } | |
717 } | |
718 } | |
719 addComment("[INFO]Histograms drawn",T,opt$log,T,display=FALSE) | |
720 | |
721 } | |
722 | |
723 #plot F-test sum square barplot | |
724 if(!is.null(allFtestMeanSquare)){ | |
725 dataToPlot=data.frame(Fratio=apply(allFtestMeanSquare,2,mean),Factors=factor(colnames(allFtestMeanSquare),levels = colnames(allFtestMeanSquare))) | |
726 | |
727 p <- ggplot(data=dataToPlot, aes(x=Factors, y=Fratio, fill=Factors)) + | |
728 geom_bar(stat="identity") + scale_fill_manual(values = colorRampPalette(brewer.pal(9,"Set1"))(ncol(allFtestMeanSquare))[sample(ncol(allFtestMeanSquare))]) + ylab(label="mean F-ratio") + | |
729 theme_bw() + theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5)) + ggtitle("Source of variation") | |
730 | |
731 if(opt$format=="pdf"){ | |
732 pdf(paste(c("./plotDir/",opt$fratioFile,".pdf"),collapse=""))}else{ | |
733 png(paste(c("./plotDir/",opt$fratioFile,".png"),collapse="")) | |
734 } | |
735 plot(p) | |
736 dev.off() | |
737 | |
738 pp <- ggplotly(p) | |
739 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$fratioFile,".html"),collapse=""),selfcontained = F) | |
740 | |
741 addComment("[INFO]SumSquareTest drawn",T,opt$log,T,display=FALSE) | |
742 } | |
743 | |
744 #plot VOLCANO plot | |
745 #volcanoplot(data.fit.eb,coef=1,highlight=10) | |
746 volcanoPerPage=1 | |
747 logFCthreshold=log2(opt$thresholdFC) | |
748 if (!is.null(opt$volcano)) { | |
749 iToPlot=1 | |
750 plotVector=list() | |
751 nbComparisons=ncol(data.fit.eb$adj_p.value) | |
752 for (iComparison in 1:nbComparisons){ | |
753 | |
754 #define the log10(p-val) threshold corresponding to FDR threshold fixed by user | |
755 probeWithLowFDR=-log10(data.fit.eb$p.value[which(data.fit.eb$adj_p.value[,iComparison]<=opt$fdrThreshold),iComparison]) | |
756 pvalThresholdFDR=NULL | |
757 if(length(probeWithLowFDR)>0)pvalThresholdFDR=min(probeWithLowFDR) | |
758 | |
759 #get significant points over FC and FDR thresholds | |
760 significativePoints=intersect(which(abs(data.fit.eb$coefficients[,iComparison])>=logFCthreshold),which(data.fit.eb$adj_p.value[,iComparison]<=opt$fdrThreshold)) | |
761 | |
762 #to reduce size of html plot, we keep 20000 points maximum sampled amongst genes with pval>=33%(pval) and abs(log2(FC))<=66%(abs(log2(FC))) | |
763 htmlPointsToRemove=intersect(which(abs(data.fit.eb$coefficients[,iComparison])<=quantile(abs(data.fit.eb$coefficients[,iComparison]),c(0.66))),which(data.fit.eb$p.value[,iComparison]>=quantile(abs(data.fit.eb$p.value[,iComparison]),c(0.33)))) | |
764 if(length(htmlPointsToRemove)>20000){ | |
765 htmlPointsToRemove=setdiff(htmlPointsToRemove,sample(htmlPointsToRemove,20000)) | |
766 }else{ | |
767 htmlPointsToRemove=c() | |
768 } | |
769 | |
770 xMinLimPlot=min(data.fit.eb$coefficients[,iComparison])-0.2 | |
771 xMaxLimPlot=max(data.fit.eb$coefficients[,iComparison])+0.2 | |
772 yMaxLimPlot= max(-log10(data.fit.eb$p.value[,iComparison]))+0.2 | |
773 | |
774 if(length(significativePoints)>0){ | |
775 dataSignifToPlot=data.frame(pval=-log10(data.fit.eb$p.value[significativePoints,iComparison]),FC=data.fit.eb$coefficients[significativePoints,iComparison],description=paste(names(data.fit.eb$coefficients[significativePoints,iComparison]),"\n","FC: " , round(2^data.fit.eb$coefficients[significativePoints,iComparison],2) , " | FDR p-val: ",prettyNum(data.fit.eb$adj_p.value[significativePoints,iComparison],digits=4), sep="")) | |
776 #to test if remains any normal points to draw | |
777 if(length(significativePoints)<nrow(data.fit.eb$p.value)){ | |
778 dataToPlot=data.frame(pval=-log10(data.fit.eb$p.value[-significativePoints,iComparison]),FC=data.fit.eb$coefficients[-significativePoints,iComparison],description=paste("FC: " , round(2^data.fit.eb$coefficients[-significativePoints,iComparison],2) , " | FDR p-val: ",prettyNum(data.fit.eb$adj_p.value[-significativePoints,iComparison],digits=4), sep="")) | |
779 }else{ | |
780 dataToPlot=data.frame(pval=0,FC=0,description="null") | |
781 } | |
782 }else{ | |
783 dataToPlot=data.frame(pval=-log10(data.fit.eb$p.value[,iComparison]),FC=data.fit.eb$coefficients[,iComparison],description=paste("FC: " , round(2^data.fit.eb$coefficients[,iComparison],2) , " | FDR p-val: ",prettyNum(data.fit.eb$adj_p.value[,iComparison],digits=4), sep="")) | |
784 } | |
785 | |
786 ##traditional plot | |
787 p <- ggplot(data=dataToPlot, aes(x=FC, y=pval)) + geom_point() + | |
788 theme_bw() + ggtitle(humanReadingContrastsRenamed[iComparison]) + ylab(label="-log10(p-val)") + xlab(label="Log2 Fold Change") + | |
789 theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),legend.position="none") | |
790 if(logFCthreshold!=0) p <- p + geom_vline(xintercept=-logFCthreshold, color="salmon",linetype="dotted", size=1) + geom_vline(xintercept=logFCthreshold, color="salmon",linetype="dotted", size=1) + geom_text(data.frame(text=c(paste(c("log2(1/FC=",opt$thresholdFC,")"),collapse=""),paste(c("log2(FC=",opt$thresholdFC,")"),collapse="")),x=c(-logFCthreshold,logFCthreshold),y=c(0,0)),mapping=aes(x=x, y=y, label=text), size=4, angle=90, vjust=-0.4, hjust=0, color="salmon") | |
791 if(!is.null(pvalThresholdFDR)) p <- p + geom_hline(yintercept=pvalThresholdFDR, color="skyblue1",linetype="dotted", size=0.5) + geom_text(data.frame(text=c(paste(c("FDR pval limit(",opt$fdrThreshold,")"),collapse="")),x=c(xMinLimPlot),y=c(pvalThresholdFDR)),mapping=aes(x=x, y=y, label=text), size=4, vjust=0, hjust=0, color="skyblue3") | |
792 if(length(significativePoints)>0)p <- p + geom_point(data=dataSignifToPlot,aes(colour=description)) | |
793 | |
794 ##interactive plot | |
795 if(length(htmlPointsToRemove)>0){ | |
796 pointToRemove=union(htmlPointsToRemove,significativePoints) | |
797 #to test if remains any normal points to draw | |
798 if(length(pointToRemove)<nrow(data.fit.eb$p.value)){ | |
799 dataToPlot=data.frame(pval=-log10(data.fit.eb$p.value[-pointToRemove,iComparison]),FC=data.fit.eb$coefficients[-pointToRemove,iComparison],description=paste("FC: " , round(2^data.fit.eb$coefficients[-pointToRemove,iComparison],2) , " | FDR p-val: ", prettyNum(data.fit.eb$adj_p.value[-pointToRemove,iComparison],digits=4), sep="")) | |
800 }else{ | |
801 dataToPlot=data.frame(pval=0,FC=0,description="null") | |
802 } | |
803 } | |
804 | |
805 if((nrow(dataToPlot)+nrow(dataSignifToPlot))>40000)addComment(c("[WARNING]For",humanReadingContrastsRenamed[iComparison],"volcano, numerous points to plot(",nrow(dataToPlot)+nrow(dataSignifToPlot),"), resulting volcano could be heavy, using more stringent thresholds could be helpful."),T,opt$log) | |
806 | |
807 phtml <- plot_ly(data=dataToPlot, x=~FC, y=~pval,type="scatter", mode="markers",showlegend = FALSE, marker = list(color="gray",opacity=0.5), text=~description, hoverinfo="text") %>% | |
808 layout(title = humanReadingContrastsRenamed[iComparison],xaxis=list(title="Log2 Fold Change",showgrid=TRUE, zeroline=FALSE),yaxis=list(title="-log10(p-val)", showgrid=TRUE, zeroline=FALSE)) | |
809 if(length(significativePoints)>0) phtml=add_markers(phtml,data=dataSignifToPlot, x=~FC, y=~pval, mode="markers" , marker=list( color=log10(abs(dataSignifToPlot$FC)*dataSignifToPlot$pval),colorscale='Rainbow'), text=~description, hoverinfo="text", inherit = FALSE) %>% hide_colorbar() | |
810 if(logFCthreshold!=0){ | |
811 phtml=add_trace(phtml,x=c(-logFCthreshold,-logFCthreshold), y=c(0,yMaxLimPlot), type="scatter", mode = "lines", line=list(color="coral",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE) | |
812 phtml=add_annotations(phtml,x=-logFCthreshold,y=0,xref = "x",yref = "y",text = paste(c("log2(1/FC=",opt$thresholdFC,")"),collapse=""),xanchor = 'right',showarrow = F,textangle=270,font=list(color="coral")) | |
813 phtml=add_trace(phtml,x=c(logFCthreshold,logFCthreshold), y=c(0, yMaxLimPlot), type="scatter", mode = "lines", line=list(color="coral",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE) | |
814 phtml=add_annotations(phtml,x=logFCthreshold,y=0,xref = "x",yref = "y",text = paste(c("log2(FC=",opt$thresholdFC,")"),collapse=""),xanchor = 'right',showarrow = F,textangle=270,font=list(color="coral")) | |
815 } | |
816 if(!is.null(pvalThresholdFDR)){ | |
817 phtml=add_trace(phtml,x=c(xMinLimPlot,xMaxLimPlot), y=c(pvalThresholdFDR,pvalThresholdFDR), type="scatter", mode = "lines", line=list(color="cornflowerblue",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE) | |
818 phtml=add_annotations(phtml,x=xMinLimPlot,y=pvalThresholdFDR+0.1,xref = "x",yref = "y",text = paste(c("FDR pval limit(",opt$fdrThreshold,")"),collapse=""),xanchor = 'left',showarrow = F,font=list(color="cornflowerblue")) | |
819 } | |
820 plotVector[[length(plotVector)+1]]=p | |
821 | |
822 #save plotly files | |
823 pp <- ggplotly(phtml) | |
824 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$volcano,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".html"),collapse=""),selfcontained = F) | |
825 | |
826 | |
827 if(iComparison==nbComparisons || length(plotVector)==volcanoPerPage){ | |
828 #plot and close the actual plot | |
829 if(opt$format=="pdf"){ | |
830 pdf(paste(c("./plotDir/",opt$volcano,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".pdf"),collapse=""))}else{ | |
831 png(paste(c("./plotDir/",opt$volcano,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".png"),collapse="")) | |
832 } | |
833 multiplot(plotlist=plotVector,cols=1) | |
834 dev.off() | |
835 if(iComparison<nbComparisons){ | |
836 #prepare for a new ploting file if necessary | |
837 plotVector=list() | |
838 iToPlot=iToPlot+1 | |
839 } | |
840 } | |
841 } | |
842 remove(dataToPlot,dataSignifToPlot) | |
843 addComment("[INFO]Volcanos drawn",T,opt$log,T,display=FALSE) | |
844 } | |
845 | |
846 rowItemInfo=NULL | |
847 if(!is.null(opt$rowNameType) && !is.null(opt$organismID)){ | |
848 ##get gene information from BioMart | |
849 #if(!require("biomaRt")){ | |
850 # source("https://bioconductor.org/biocLite.R") | |
851 # biocLite("biomaRt") | |
852 #} | |
853 | |
854 ensembl_hs_mart <- useMart(biomart="ensembl", dataset=opt$organismID) | |
855 ensembl_df <- getBM(attributes=c(opt$rowNameType,"description"),mart=ensembl_hs_mart) | |
856 rowItemInfo=ensembl_df[which(ensembl_df[,1]!=""),2] | |
857 rowItemInfo=unlist(lapply(rowItemInfo,function(x)substr(unlist(strsplit(x," \\[Source"))[1],1,30))) | |
858 names(rowItemInfo)=ensembl_df[which(ensembl_df[,1]!=""),1] | |
859 } | |
860 | |
861 #write(unlist(dimnames(data.fit.eb$adj_p.value)),opt$log,append = T) | |
862 | |
863 #prepare additional output containing df informations | |
864 dfMatrix=matrix(0,ncol=3,nrow = nrow(data.fit.eb$coefficients),dimnames = list(rownames(data.fit.eb$coefficients),c("df.residual","df.prior","df.total"))) | |
865 dfMatrix[,"df.residual"]=data.fit.eb$df.residual | |
866 dfMatrix[,"df.prior"]=data.fit.eb$df.prior | |
867 dfMatrix[,"df.total"]=data.fit.eb$df.total | |
868 | |
869 #filter out genes with higher p-values for all comparisons | |
870 genesToKeep=names(which(apply(data.fit.eb$adj_p.value,1,function(x)length(which(x<=opt$fdrThreshold))>0))) | |
871 #filter out genes with lower FC for all comparisons | |
872 genesToKeep=intersect(genesToKeep,names(which(apply(data.fit.eb$coefficients,1,function(x)length(which(abs(x)>=logFCthreshold))>0)))) | |
873 | |
874 if(length(genesToKeep)>0){ | |
875 data.fit.eb$adj_p.value=matrix(data.fit.eb$adj_p.value[genesToKeep,],ncol=ncol(data.fit.eb$adj_p.value)) | |
876 rownames(data.fit.eb$adj_p.value)=genesToKeep | |
877 colnames(data.fit.eb$adj_p.value)=colnames(data.fit.eb$p.value) | |
878 | |
879 data.fit.eb$p.value=matrix(data.fit.eb$p.value[genesToKeep,],ncol=ncol(data.fit.eb$p.value)) | |
880 rownames(data.fit.eb$p.value)=genesToKeep | |
881 colnames(data.fit.eb$p.value)=colnames(data.fit.eb$adj_p.value) | |
882 | |
883 data.fit.eb$coefficients=matrix(data.fit.eb$coefficients[genesToKeep,],ncol=ncol(data.fit.eb$coefficients)) | |
884 rownames(data.fit.eb$coefficients)=genesToKeep | |
885 colnames(data.fit.eb$coefficients)=colnames(data.fit.eb$adj_p.value) | |
886 | |
887 data.fit.eb$t=matrix(data.fit.eb$t[genesToKeep,],ncol=ncol(data.fit.eb$t)) | |
888 rownames(data.fit.eb$t)=genesToKeep | |
889 colnames(data.fit.eb$t)=colnames(data.fit.eb$adj_p.value) | |
890 | |
891 dfMatrix=dfMatrix[genesToKeep,,drop=FALSE] | |
892 | |
893 }else{ | |
894 addComment(c("[WARNING]No significative genes considering the given FDR threshold : ",opt$fdrThreshold),T,opt$log,display=FALSE) | |
895 } | |
896 | |
897 addComment("[INFO]Significant genes filtering done",T,opt$log,T,display=FALSE) | |
898 | |
899 | |
900 #plot VennDiagramm for genes below threshold between comparisons | |
901 #t=apply(data.fit.eb$adj_p.value[,1:4],2,function(x)names(which(x<=opt$threshold))) | |
902 #get.venn.partitions(t) | |
903 #vennCounts(data.fit.eb$adj_p.value[,1:4]<=opt$threshold) | |
904 | |
905 #make a simple sort genes based only on the first comparison | |
906 #newOrder=order(data.fit.eb$adj_p.value[,1]) | |
907 #data.fit.eb$adj_p.value=data.fit.eb$adj_p.value[newOrder,] | |
908 | |
909 #alternative sorting strategy based on the mean gene rank over all comparisons | |
910 if(length(genesToKeep)>1){ | |
911 currentRank=rep(0,nrow(data.fit.eb$adj_p.value)) | |
912 for(iComparison in 1:ncol(data.fit.eb$adj_p.value)){ | |
913 currentRank=currentRank+rank(data.fit.eb$adj_p.value[,iComparison]) | |
914 } | |
915 currentRank=currentRank/ncol(data.fit.eb$adj_p.value) | |
916 newOrder=order(currentRank) | |
917 | |
918 data.fit.eb$adj_p.value=matrix(data.fit.eb$adj_p.value[newOrder,],ncol=ncol(data.fit.eb$adj_p.value)) | |
919 rownames(data.fit.eb$adj_p.value)=rownames(data.fit.eb$p.value)[newOrder] | |
920 colnames(data.fit.eb$adj_p.value)=colnames(data.fit.eb$p.value) | |
921 | |
922 data.fit.eb$p.value=matrix(data.fit.eb$p.value[newOrder,],ncol=ncol(data.fit.eb$p.value)) | |
923 rownames(data.fit.eb$p.value)=rownames(data.fit.eb$adj_p.value) | |
924 colnames(data.fit.eb$p.value)=colnames(data.fit.eb$adj_p.value) | |
925 | |
926 data.fit.eb$coefficients=matrix(data.fit.eb$coefficients[newOrder,],ncol=ncol(data.fit.eb$coefficients)) | |
927 rownames(data.fit.eb$coefficients)=rownames(data.fit.eb$adj_p.value) | |
928 colnames(data.fit.eb$coefficients)=colnames(data.fit.eb$adj_p.value) | |
929 | |
930 data.fit.eb$t=matrix(data.fit.eb$t[newOrder,],ncol=ncol(data.fit.eb$t)) | |
931 rownames(data.fit.eb$t)=rownames(data.fit.eb$adj_p.value) | |
932 colnames(data.fit.eb$t)=colnames(data.fit.eb$adj_p.value) | |
933 | |
934 dfMatrix=dfMatrix[newOrder,,drop=FALSE] | |
935 } | |
936 | |
937 | |
938 #formating output matrices depending on genes to keep | |
939 if(length(genesToKeep)==0){ | |
940 outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=3) | |
941 outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5)) | |
942 outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value))) | |
943 outputData[,1]=c("LIMMA","Gene","noGene") | |
944 outputData[,2]=c("Comparison","Info","noInfo") | |
945 | |
946 outputDfData=matrix(0,ncol=3+1,nrow=2) | |
947 outputDfData[1,]=c("X","df.residual","df.prior","df.total") | |
948 outputDfData[,1]=c("Statistics","noGene") | |
949 }else{ | |
950 if(length(genesToKeep)==1){ | |
951 outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=3) | |
952 outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5)) | |
953 outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value))) | |
954 outputData[,1]=c("LIMMA","Gene",genesToKeep) | |
955 outputData[,2]=c("Comparison","Info","na") | |
956 if(!is.null(rowItemInfo))outputData[3,2]=rowItemInfo[genesToKeep] | |
957 outputData[3,seq(3,ncol(outputData),5)]=prettyNum(data.fit.eb$p.value,digits=4) | |
958 outputData[3,seq(4,ncol(outputData),5)]=prettyNum(data.fit.eb$adj_p.value,digits=4) | |
959 outputData[3,seq(5,ncol(outputData),5)]=prettyNum(2^data.fit.eb$coefficients,digits=4) | |
960 outputData[3,seq(6,ncol(outputData),5)]=prettyNum(data.fit.eb$coefficients,digits=4) | |
961 outputData[3,seq(7,ncol(outputData),5)]=prettyNum(data.fit.eb$t,digits=4) | |
962 | |
963 outputDfData=matrix(0,ncol=3+1,nrow=1+nrow(dfMatrix)) | |
964 outputDfData[1,]=c("Statistics","df.residual","df.prior","df.total") | |
965 outputDfData[2,]=c(rownames(dfMatrix),prettyNum(dfMatrix[,c("df.residual","df.prior","df.total")],digits=4)) | |
966 }else{ | |
967 #format matrix to be correctly read by galaxy (move headers in first column and row) | |
968 outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=nrow(data.fit.eb$adj_p.value)+2) | |
969 outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5)) | |
970 outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value))) | |
971 outputData[,1]=c("LIMMA","Gene",rownames(data.fit.eb$adj_p.value)) | |
972 outputData[,2]=c("Comparison","Info",rep("na",nrow(data.fit.eb$adj_p.value))) | |
973 if(!is.null(rowItemInfo))outputData[3:nrow(outputData),2]=rowItemInfo[rownames(data.fit.eb$adj_p.value)] | |
974 outputData[3:nrow(outputData),seq(3,ncol(outputData),5)]=prettyNum(data.fit.eb$p.value,digits=4) | |
975 outputData[3:nrow(outputData),seq(4,ncol(outputData),5)]=prettyNum(data.fit.eb$adj_p.value,digits=4) | |
976 outputData[3:nrow(outputData),seq(5,ncol(outputData),5)]=prettyNum(2^data.fit.eb$coefficients,digits=4) | |
977 outputData[3:nrow(outputData),seq(6,ncol(outputData),5)]=prettyNum(data.fit.eb$coefficients,digits=4) | |
978 outputData[3:nrow(outputData),seq(7,ncol(outputData),5)]=prettyNum(data.fit.eb$t,digits=4) | |
979 | |
980 outputDfData=matrix(0,ncol=3+1,nrow=1+nrow(dfMatrix)) | |
981 outputDfData[1,]=c("Statistics","df.residual","df.prior","df.total") | |
982 outputDfData[2:(1+nrow(dfMatrix)),]=cbind(rownames(dfMatrix),prettyNum(dfMatrix[,c("df.residual")],digits=4),prettyNum(dfMatrix[,c("df.prior")],digits=4),prettyNum(dfMatrix[,c("df.total")],digits=4)) | |
983 } | |
984 } | |
985 addComment("[INFO]Formated output",T,opt$log,display=FALSE) | |
986 | |
987 #write output results | |
988 write.table(outputData,file=opt$outputFile,quote=FALSE,sep="\t",col.names = F,row.names = F) | |
989 | |
990 #write df info file | |
991 write.table(outputDfData,file=opt$outputDfFile,quote=FALSE,sep="\t",col.names = F,row.names = F) | |
992 | |
993 end.time <- Sys.time() | |
994 addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE) | |
995 | |
996 addComment("[INFO]End of R script",T,opt$log,display=FALSE) | |
997 | |
998 printSessionInfo(opt$log) | |
999 #sessionInfo() | |
1000 | |
1001 | |
1002 |