comparison lib.r @ 0:139ff66b0b5d draft

planemo upload commit f69695e76674862ed9c77c1c127f459b4df42464
author workflow4metabolomics
date Fri, 26 Jul 2019 16:49:18 -0400
parents
children ea15115a5b3f
comparison
equal deleted inserted replaced
-1:000000000000 0:139ff66b0b5d
1 # lib.r
2
3 #@author G. Le Corguille
4 # solve an issue with batch if arguments are logical TRUE/FALSE
5 parseCommandArgs <- function(...) {
6 args <- batch::parseCommandArgs(...)
7 for (key in names(args)) {
8 if (args[key] %in% c("TRUE","FALSE"))
9 args[key] = as.logical(args[key])
10 }
11 return(args)
12 }
13
14 #@author G. Le Corguille
15 # This function will
16 # - load the packages
17 # - display the sessionInfo
18 loadAndDisplayPackages <- function(pkgs) {
19 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE)))
20
21 sessioninfo = sessionInfo()
22 cat(sessioninfo$R.version$version.string,"\n")
23 cat("Main packages:\n")
24 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
25 cat("Other loaded packages:\n")
26 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
27 }
28
29 # This function retrieve a xset like object
30 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
31 getxcmsSetObject <- function(xobject) {
32 # XCMS 1.x
33 if (class(xobject) == "xcmsSet")
34 return (xobject)
35 # XCMS 3.x
36 if (class(xobject) == "XCMSnExp") {
37 # Get the legacy xcmsSet object
38 suppressWarnings(xset <- as(xobject, 'xcmsSet'))
39 if (is.null(xset@phenoData$sample_group))
40 sampclass(xset) = "."
41 else
42 sampclass(xset) <- xset@phenoData$sample_group
43 if (!is.null(xset@phenoData$sample_name))
44 rownames(xset@phenoData) = xset@phenoData$sample_name
45 return (xset)
46 }
47 }
48
49 #@author G. Le Corguille
50 #The function create a pdf from the different png generated by diffreport
51 diffreport_png2pdf <- function(filebase) {
52 dir.create("pdf")
53
54 pdfEicOutput = paste0("pdf/",filebase,"-eic_pdf.pdf")
55 pdfBoxOutput = paste0("pdf/",filebase,"-box_pdf.pdf")
56
57 system(paste0("gm convert ",filebase,"_eic/*.png ",pdfEicOutput))
58 system(paste0("gm convert ",filebase,"_box/*.png ",pdfBoxOutput))
59
60 }
61
62 #@author G. Le Corguille
63 #The function create a zip archive from the different png generated by diffreport
64 diffreport_png2zip <- function() {
65 zip("eic.zip", dir(pattern="_eic"), zip=Sys.which("zip"))
66 zip("box.zip", dir(pattern="_box"), zip=Sys.which("zip"))
67 }
68
69 #The function create a zip archive from the different tabular generated by diffreport
70 diffreport_tabular2zip <- function() {
71 zip("tabular.zip", dir(pattern="tabular/*"), zip=Sys.which("zip"))
72 }
73
74 #@author G. Le Corguille
75 #This function convert if it is required the Retention Time in minutes
76 RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
77 if (convertRTMinute){
78 #converting the retention times (seconds) into minutes
79 print("converting the retention times into minutes in the variableMetadata")
80 variableMetadata[,"rt"]=variableMetadata[,"rt"]/60
81 variableMetadata[,"rtmin"]=variableMetadata[,"rtmin"]/60
82 variableMetadata[,"rtmax"]=variableMetadata[,"rtmax"]/60
83 }
84 return (variableMetadata)
85 }
86
87 #@author G. Le Corguille
88 #This function format ions identifiers
89 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) {
90 splitDeco = strsplit(as.character(variableMetadata$name),"_")
91 idsDeco = sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) })
92 namecustom = make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco))
93 variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))])
94 return(variableMetadata)
95 }
96
97 #The function annotateDiffreport without the corr function which bugs
98 annotatediff <- function(xset=xset, args=args, variableMetadataOutput="variableMetadata.tsv") {
99 # Resolve the bug with x11, with the function png
100 options(bitmapType='cairo')
101
102 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped.
103 res=try(is.null(xset@filled))
104
105 # ------ annot -------
106 args$calcCiS=as.logical(args$calcCiS)
107 args$calcIso=as.logical(args$calcIso)
108 args$calcCaS=as.logical(args$calcCaS)
109
110 # common parameters
111 args4annotate = list(object=xset,
112 nSlaves=args$nSlaves,sigma=args$sigma,perfwhm=args$perfwhm,
113 maxcharge=args$maxcharge,maxiso=args$maxiso,minfrac=args$minfrac,
114 ppm=args$ppm,mzabs=args$mzabs,quick=args$quick,
115 polarity=args$polarity,max_peaks=args$max_peaks,intval=args$intval)
116
117 # quick == FALSE
118 if(args$quick==FALSE) {
119 args4annotate = append(args4annotate,
120 list(graphMethod=args$graphMethod,cor_eic_th=args$cor_eic_th,pval=args$pval,
121 calcCiS=args$calcCiS,calcIso=args$calcIso,calcCaS=args$calcCaS))
122 # no ruleset
123 if (!is.null(args$multiplier)) {
124 args4annotate = append(args4annotate,
125 list(multiplier=args$multiplier))
126 }
127 # ruleset
128 else {
129 rulset=read.table(args$rules, h=T, sep=";")
130 if (ncol(rulset) < 4) rulset=read.table(args$rules, h=T, sep="\t")
131 if (ncol(rulset) < 4) rulset=read.table(args$rules, h=T, sep=",")
132 if (ncol(rulset) < 4) {
133 error_message="Your ruleset file seems not well formatted. The column separators accepted are ; , and tabulation"
134 print(error_message)
135 stop(error_message)
136 }
137
138 args4annotate = append(args4annotate,
139 list(rules=rulset))
140 }
141 }
142
143
144 # launch annotate
145 xa = do.call("annotate", args4annotate)
146 peakList=getPeaklist(xa,intval=args$intval)
147 peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name");
148
149 # --- Multi condition : diffreport ---
150 diffrepOri=NULL
151 if (!is.null(args$runDiffreport) & nlevels(sampclass(xset))>=2) {
152 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped.
153 res=try(is.null(xset@filled))
154 classes=levels(sampclass(xset))
155 x=1:(length(classes)-1)
156 for (i in seq(along=x) ) {
157 y=1:(length(classes))
158 for (n in seq(along=y)){
159 if(i+n <= length(classes)){
160 filebase=paste(classes[i],class2=classes[i+n],sep="-vs-")
161
162 diffrep=diffreport(
163 object=xset,class1=classes[i],class2=classes[i+n],
164 filebase=filebase,eicmax=args$eicmax,eicwidth=args$eicwidth,
165 sortpval=TRUE,value=args$value,h=args$h,w=args$w,mzdec=args$mzdec,missing=0)
166
167 diffrepOri = diffrep
168
169 # renamming of the column rtmed to rt to fit with camera peaklist function output
170 colnames(diffrep)[colnames(diffrep)=="rtmed"] <- "rt"
171 colnames(diffrep)[colnames(diffrep)=="mzmed"] <- "mz"
172
173 # combines results and reorder columns
174 diffrep = merge(peakList, diffrep[,c("name","fold","tstat","pvalue")], by.x="name", by.y="name", sort=F)
175 diffrep = cbind(diffrep[,!(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))],diffrep[,(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))])
176
177 diffrep = RTSecondToMinute(diffrep, args$convertRTMinute)
178 diffrep = formatIonIdentifiers(diffrep, numDigitsRT=args$numDigitsRT, numDigitsMZ=args$numDigitsMZ)
179
180 if(args$sortpval){
181 diffrep=diffrep[order(diffrep$pvalue), ]
182 }
183
184 dir.create("tabular", showWarnings = FALSE)
185 write.table(diffrep, sep="\t", quote=FALSE, row.names=FALSE, file=paste("tabular/",filebase,"_tsv.tabular",sep=""))
186
187 if (args$eicmax != 0) {
188 if (args$png2 == "pdf")
189 diffreport_png2pdf(filebase)
190 }
191 }
192 }
193 }
194 if (args$png2 == "zip")
195 diffreport_png2zip()
196 if (args$tabular2 == "zip")
197 diffreport_tabular2zip()
198 }
199
200 # --- variableMetadata ---
201 variableMetadata=peakList[,!(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))]
202 variableMetadata = RTSecondToMinute(variableMetadata, args$convertRTMinute)
203 variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=args$numDigitsRT, numDigitsMZ=args$numDigitsMZ)
204 # if we have 2 conditions, we keep stat of diffrep
205 if (!is.null(args$runDiffreport) & nlevels(sampclass(xset))==2) {
206 variableMetadata = merge(variableMetadata, diffrep[,c("name","fold","tstat","pvalue")],by.x="name", by.y="name", sort=F)
207 if(exists("args[[\"sortpval\"]]")){
208 variableMetadata=variableMetadata[order(variableMetadata$pvalue), ]
209 }
210 }
211
212 variableMetadataOri=variableMetadata
213 write.table(variableMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=variableMetadataOutput)
214
215 return(list("xa"=xa,"diffrep"=diffrepOri,"variableMetadata"=variableMetadataOri));
216
217 }
218
219
220 combinexsAnnos_function <- function(xaP, xaN, diffrepP=NULL,diffrepN=NULL,
221 pos=TRUE,tol=2,ruleset=NULL,keep_meta=TRUE, convertRTMinute=F, numDigitsMZ=0,
222 numDigitsRT=0, variableMetadataOutput="variableMetadata.tsv"){
223
224 #Load the two Rdata to extract the xset objects from positive and negative mode
225 cat("\tObject xset from positive mode\n")
226 print(xaP)
227 cat("\n")
228
229 cat("\tObject xset from negative mode\n")
230 print(xaN)
231 cat("\n")
232
233 cat("\n")
234 cat("\tCombining...\n")
235 #Convert the string to numeric for creating matrix
236 row=as.numeric(strsplit(ruleset,",")[[1]][1])
237 column=as.numeric(strsplit(ruleset,",")[[1]][2])
238 ruleset=cbind(row,column)
239 #Test if the file comes from an older version tool
240 if ((!is.null(xaP)) & (!is.null(xaN))) {
241 #Launch the combinexsannos function from CAMERA
242 cAnnot=combinexsAnnos(xaP, xaN,pos=pos,tol=tol,ruleset=ruleset)
243 } else {
244 stop("You must relauch the CAMERA.annotate step with the lastest version.")
245 }
246
247 if(pos){
248 xa=xaP
249 mode="neg. Mode"
250 } else {
251 xa=xaN
252 mode="pos. Mode"
253 }
254
255 peakList=getPeaklist(xa)
256 peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name");
257 variableMetadata=cbind(peakList, cAnnot[, c("isotopes", "adduct", "pcgroup",mode)]);
258 variableMetadata=variableMetadata[,!(colnames(variableMetadata) %in% c(sampnames(xa@xcmsSet)))]
259
260 #Test if there are more than two classes (conditions)
261 if ( nlevels(sampclass(xaP@xcmsSet))==2 & (!is.null(diffrepN)) & (!is.null(diffrepP))) {
262 diffrepP = diffrepP[,c("name","fold","tstat","pvalue")]; colnames(diffrepP) = paste("P.",colnames(diffrepP),sep="")
263 diffrepN = diffrepN[,c("name","fold","tstat","pvalue")]; colnames(diffrepN) = paste("N.",colnames(diffrepN),sep="")
264
265 variableMetadata = merge(variableMetadata, diffrepP, by.x="name", by.y="P.name")
266 variableMetadata = merge(variableMetadata, diffrepN, by.x="name", by.y="N.name")
267 }
268
269 rownames(variableMetadata) = NULL
270 #TODO: checker
271 #colnames(variableMetadata)[1:2] = c("name","mz/rt");
272
273 variableMetadata = RTSecondToMinute(variableMetadata, convertRTMinute)
274 variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)
275
276 #If the user want to keep only the metabolites which match a difference
277 if(keep_meta){
278 variableMetadata=variableMetadata[variableMetadata[,c(mode)]!="",]
279 }
280
281 #Write the output into a tsv file
282 write.table(variableMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=variableMetadataOutput)
283 return(variableMetadata);
284
285 }
286
287 # This function get the raw file path from the arguments
288 getRawfilePathFromArguments <- function(singlefile, zipfile, args) {
289 if (!is.null(args$zipfile)) zipfile = args$zipfile
290 if (!is.null(args$zipfilePositive)) zipfile = args$zipfilePositive
291 if (!is.null(args$zipfileNegative)) zipfile = args$zipfileNegative
292
293 if (!is.null(args$singlefile_galaxyPath)) {
294 singlefile_galaxyPaths = args$singlefile_galaxyPath;
295 singlefile_sampleNames = args$singlefile_sampleName
296 }
297 if (!is.null(args$singlefile_galaxyPathPositive)) {
298 singlefile_galaxyPaths = args$singlefile_galaxyPathPositive;
299 singlefile_sampleNames = args$singlefile_sampleNamePositive
300 }
301 if (!is.null(args$singlefile_galaxyPathNegative)) {
302 singlefile_galaxyPaths = args$singlefile_galaxyPathNegative;
303 singlefile_sampleNames = args$singlefile_sampleNameNegative
304 }
305 if (exists("singlefile_galaxyPaths")){
306 singlefile_galaxyPaths = unlist(strsplit(singlefile_galaxyPaths,","))
307 singlefile_sampleNames = unlist(strsplit(singlefile_sampleNames,","))
308
309 singlefile=NULL
310 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
311 singlefile_galaxyPath=singlefile_galaxyPaths[singlefile_galaxyPath_i]
312 singlefile_sampleName=singlefile_sampleNames[singlefile_galaxyPath_i]
313 singlefile[[singlefile_sampleName]] = singlefile_galaxyPath
314 }
315 }
316 for (argument in c("zipfile", "zipfilePositive", "zipfileNegative",
317 "singlefile_galaxyPath", "singlefile_sampleName",
318 "singlefile_galaxyPathPositive", "singlefile_sampleNamePositive",
319 "singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) {
320 args[[argument]]=NULL
321 }
322 return(list(zipfile=zipfile, singlefile=singlefile, args=args))
323 }
324
325
326 # This function retrieve the raw file in the working directory
327 # - if zipfile: unzip the file with its directory tree
328 # - if singlefiles: set symlink with the good filename
329 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) {
330 if(!is.null(singlefile) && (length("singlefile")>0)) {
331 for (singlefile_sampleName in names(singlefile)) {
332 singlefile_galaxyPath = singlefile[[singlefile_sampleName]]
333 if(!file.exists(singlefile_galaxyPath)){
334 error_message=paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!")
335 print(error_message); stop(error_message)
336 }
337
338 file.symlink(singlefile_galaxyPath,singlefile_sampleName)
339 }
340 directory = "."
341
342 }
343 if(!is.null(zipfile) && (zipfile!="")) {
344 if(!file.exists(zipfile)){
345 error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")
346 print(error_message)
347 stop(error_message)
348 }
349
350 #list all file in the zip file
351 #zip_files=unzip(zipfile,list=T)[,"Name"]
352
353 #unzip
354 suppressWarnings(unzip(zipfile, unzip="unzip"))
355
356 #get the directory name
357 filesInZip=unzip(zipfile, list=T);
358 directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])));
359 directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
360 directory = "."
361 if (length(directories) == 1) directory = directories
362
363 cat("files_root_directory\t",directory,"\n")
364
365 }
366 return (directory)
367 }
368
369 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
370 # https://github.com/sneumann/CAMERA/issues/33#issuecomment-405168524
371 # https://github.com/sneumann/xcms/commit/950a3fe794cdb6b0fda88696e31aab3d97a3b7dd
372 ############################################################
373 ## getEIC
374 getEIC <- function(object, mzrange, rtrange = 200,
375 groupidx, sampleidx = sampnames(object),
376 rt = c("corrected", "raw")) {
377
378 files <- filepaths(object)
379 grp <- groups(object)
380 samp <- sampnames(object)
381 prof <- profinfo(object)
382
383 rt <- match.arg(rt)
384
385 if (is.numeric(sampleidx))
386 sampleidx <- sampnames(object)[sampleidx]
387 sampidx <- match(sampleidx, sampnames(object))
388
389 if (!missing(groupidx)) {
390 if (is.numeric(groupidx))
391 groupidx <- groupnames(object)[unique(as.integer(groupidx))]
392 grpidx <- match(groupidx, groupnames(object, template = groupidx))
393 }
394
395 if (missing(mzrange)) {
396 if (missing(groupidx))
397 stop("No m/z range or groups specified")
398 if (any(is.na(groupval(object, value = "mz"))))
399 warning(
400 "`NA` values in xcmsSet. Use fillPeaks() on the object to fill",
401 "-in missing peak values. Note however that this will also ",
402 "insert intensities of 0 for peaks that can not be filled in.")
403 mzmin <- apply(groupval(object, value = "mzmin"), 1, min, na.rm = TRUE)
404 mzmax <- apply(groupval(object, value = "mzmax"), 1, max, na.rm = TRUE)
405 mzrange <- matrix(c(mzmin[grpidx], mzmax[grpidx]), ncol = 2)
406 ## if (any(is.na(groupval(object, value = "mz"))))
407 ## stop('Please use fillPeaks() to fill up NA values !')
408 ## mzmin <- -rowMax(-groupval(object, value = "mzmin"))
409 ## mzmax <- rowMax(groupval(object, value = "mzmax"))
410 ## mzrange <- matrix(c(mzmin[grpidx], mzmax[grpidx]), ncol = 2)
411 } else if (all(c("mzmin","mzmax") %in% colnames(mzrange)))
412 mzrange <- mzrange[,c("mzmin", "mzmax"),drop=FALSE]
413 else if (is.null(dim(mzrange)))
414 stop("mzrange must be a matrix")
415 colnames(mzrange) <- c("mzmin", "mzmax")
416
417 if (length(rtrange) == 1) {
418 if (missing(groupidx))
419 rtrange <- matrix(rep(range(object@rt[[rt]][sampidx]), nrow(mzrange)),
420 ncol = 2, byrow = TRUE)
421 else {
422 rtrange <- retexp(grp[grpidx,c("rtmin","rtmax"),drop=FALSE], rtrange)
423 }
424 } else if (is.null(dim(rtrange)))
425 stop("rtrange must be a matrix or single number")
426 colnames(rtrange) <- c("rtmin", "rtmax")
427
428 ## Ensure that we've got corrected retention time if requested.
429 if (is.null(object@rt[[rt]]))
430 stop(rt, " retention times not present in 'object'!")
431
432 ## Ensure that the defined retention time range is within the rtrange of the
433 ## object: we're using the max minimal rt of all files and the min maximal rt
434 rtrs <- lapply(object@rt[[rt]], range)
435 rtr <- c(max(unlist(lapply(rtrs, "[", 1))),
436 min(unlist(lapply(rtrs, "[", 2))))
437 ## Check if we've got a range which is completely off:
438 if (any(rtrange[, "rtmin"] >= rtr[2] | rtrange[, "rtmax"] <= rtr[1])) {
439 outs <- which(rtrange[, "rtmin"] >= rtr[2] |
440 rtrange[, "rtmax"] <= rtr[1])
441 stop(length(outs), " of the specified 'rtrange' are completely outside ",
442 "of the retention time range of 'object' which is (", rtr[1], ", ",
443 rtr[2], "). The first was: (", rtrange[outs[1], "rtmin"], ", ",
444 rtrange[outs[1], "rtmax"], "!")
445 }
446 lower_rt_outside <- rtrange[, "rtmin"] < rtr[1]
447 upper_rt_outside <- rtrange[, "rtmax"] > rtr[2]
448 if (any(lower_rt_outside) | any(upper_rt_outside)) {
449 ## Silently fix these ranges.
450 rtrange[lower_rt_outside, "rtmin"] <- rtr[1]
451 rtrange[upper_rt_outside, "rtmax"] <- rtr[2]
452 }
453
454 if (missing(groupidx))
455 gnames <- character(0)
456 else
457 gnames <- groupidx
458
459 eic <- vector("list", length(sampleidx))
460 names(eic) <- sampleidx
461
462 for (i in seq(along = sampidx)) {
463
464 ## cat(sampleidx[i], "")
465 flush.console()
466 ## getXcmsRaw takes care of rt correction, susetting to scanrage and other
467 ## stuff.
468 lcraw <- getXcmsRaw(object, sampleidx = sampidx[i], rt=rt)
469 currenteic <- xcms::getEIC(lcraw, mzrange, rtrange, step = prof$step)
470 eic[[i]] <- currenteic@eic[[1]]
471 rm(lcraw)
472 gc()
473 }
474 ## cat("\n")
475
476 invisible(new("xcmsEIC", eic = eic, mzrange = mzrange, rtrange = rtrange,
477 rt = rt, groupnames = gnames))
478 }
479
480 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
481 # https://github.com/sneumann/CAMERA/issues/33#issuecomment-405168524
482 # https://github.com/sneumann/xcms/commit/950a3fe794cdb6b0fda88696e31aab3d97a3b7dd
483 ############################################################
484 ## diffreport
485 diffreport = function(object,
486 class1 = levels(sampclass(object))[1],
487 class2 = levels(sampclass(object))[2],
488 filebase = character(),
489 eicmax = 0, eicwidth = 200,
490 sortpval = TRUE,
491 classeic = c(class1,class2),
492 value = c("into","maxo","intb"),
493 metlin = FALSE,
494 h = 480, w = 640, mzdec=2,
495 missing = numeric(), ...) {
496
497 if ( nrow(object@groups)<1 || length(object@groupidx) <1) {
498 stop("No group information. Use group().")
499 }
500
501 if (!is.numeric(w) || !is.numeric(h))
502 stop("'h' and 'w' have to be numeric")
503 ## require(multtest) || stop("Couldn't load multtest")
504
505 value <- match.arg(value)
506 groupmat <- groups(object)
507 if (length(groupmat) == 0)
508 stop("No group information found")
509 samples <- sampnames(object)
510 n <- length(samples)
511 classlabel <- sampclass(object)
512 classlabel <- levels(classlabel)[as.vector(unclass(classlabel))]
513
514 values <- groupval(object, "medret", value=value)
515 indecies <- groupval(object, "medret", value = "index")
516
517 if (!all(c(class1,class2) %in% classlabel))
518 stop("Incorrect Class Labels")
519
520 ## c1 and c2 are column indices of class1 and class2 resp.
521 c1 <- which(classlabel %in% class1)
522 c2 <- which(classlabel %in% class2)
523 ceic <- which(classlabel %in% classeic)
524 if (length(intersect(c1, c2)) > 0)
525 stop("Intersecting Classes")
526
527 ## Optionally replace NA values with the value provided with missing
528 if (length(missing)) {
529 if (is.numeric(missing)) {
530 ## handles NA, Inf and -Inf
531 values[, c(c1, c2)][!is.finite(values[, c(c1, c2)])] <- missing[1]
532 } else
533 stop("'missing' should be numeric")
534 }
535 ## Check against missing Values
536 if (any(is.na(values[, c(c1, c2)])))
537 warning("`NA` values in xcmsSet. Use fillPeaks() on the object to fill",
538 "-in missing peak values. Note however that this will also ",
539 "insert intensities of 0 for peaks that can not be filled in.")
540
541 mean1 <- rowMeans(values[,c1,drop=FALSE], na.rm = TRUE)
542 mean2 <- rowMeans(values[,c2,drop=FALSE], na.rm = TRUE)
543
544 ## Calculate fold change.
545 ## For foldchange <1 set fold to 1/fold
546 ## See tstat to check which was higher
547 fold <- mean2 / mean1
548 fold[!is.na(fold) & fold < 1] <- 1/fold[!is.na(fold) & fold < 1]
549
550 testval <- values[,c(c1,c2)]
551 ## Replace eventual infinite values with NA (CAMERA issue #33)
552 testval[is.infinite(testval)] <- NA
553 testclab <- c(rep(0,length(c1)),rep(1,length(c2)))
554
555 if (min(length(c1), length(c2)) >= 2) {
556 tstat <- mt.teststat(testval, testclab, ...)
557 pvalue <- xcms:::pval(testval, testclab, tstat)
558 } else {
559 message("Too few samples per class, skipping t-test.")
560 tstat <- pvalue <- rep(NA,nrow(testval))
561 }
562 stat <- data.frame(fold = fold, tstat = tstat, pvalue = pvalue)
563 if (length(levels(sampclass(object))) >2) {
564 pvalAnova<-c()
565 for(i in 1:nrow(values)){
566 var<-as.numeric(values[i,])
567 ano<-summary(aov(var ~ sampclass(object)) )
568 pvalAnova<-append(pvalAnova, unlist(ano)["Pr(>F)1"])
569 }
570 stat<-cbind(stat, anova= pvalAnova)
571 }
572 if (metlin) {
573 neutralmass <- groupmat[,"mzmed"] + ifelse(metlin < 0, 1, -1)
574 metlin <- abs(metlin)
575 digits <- ceiling(-log10(metlin))+1
576 metlinurl <-
577 paste("http://metlin.scripps.edu/simple_search_result.php?mass_min=",
578 round(neutralmass - metlin, digits), "&mass_max=",
579 round(neutralmass + metlin, digits), sep="")
580 values <- cbind(metlin = metlinurl, values)
581 }
582 twosamp <- cbind(name = groupnames(object), stat, groupmat, values)
583 if (sortpval) {
584 tsidx <- order(twosamp[,"pvalue"])
585 twosamp <- twosamp[tsidx,]
586 rownames(twosamp) <- 1:nrow(twosamp)
587 values<-values[tsidx,]
588 } else
589 tsidx <- 1:nrow(values)
590
591 if (length(filebase))
592 write.table(twosamp, paste(filebase, ".tsv", sep = ""), quote = FALSE, sep = "\t", col.names = NA)
593
594 if (eicmax > 0) {
595 if (length(unique(peaks(object)[,"rt"])) > 1) {
596 ## This looks like "normal" LC data
597
598 eicmax <- min(eicmax, length(tsidx))
599 eics <- getEIC(object, rtrange = eicwidth*1.1, sampleidx = ceic,
600 groupidx = tsidx[seq(length = eicmax)])
601
602 if (length(filebase)) {
603 eicdir <- paste(filebase, "_eic", sep="")
604 boxdir <- paste(filebase, "_box", sep="")
605 dir.create(eicdir)
606 dir.create(boxdir)
607 if (capabilities("png")){
608 xcms:::xcmsBoxPlot(values[seq(length = eicmax),],
609 sampclass(object), dirpath=boxdir, pic="png", width=w, height=h)
610 png(file.path(eicdir, "%003d.png"), width = w, height = h)
611 } else {
612 xcms:::xcmsBoxPlot(values[seq(length = eicmax),],
613 sampclass(object), dirpath=boxdir, pic="pdf", width=w, height=h)
614 pdf(file.path(eicdir, "%003d.pdf"), width = w/72,
615 height = h/72, onefile = FALSE)
616 }
617 }
618 plot(eics, object, rtrange = eicwidth, mzdec=mzdec)
619
620 if (length(filebase))
621 dev.off()
622 } else {
623 ## This looks like a direct-infusion single spectrum
624 if (length(filebase)) {
625 eicdir <- paste(filebase, "_eic", sep="")
626 boxdir <- paste(filebase, "_box", sep="")
627 dir.create(eicdir)
628 dir.create(boxdir)
629 if (capabilities("png")){
630 xcmsBoxPlot(values[seq(length = eicmax),],
631 sampclass(object), dirpath=boxdir, pic="png",
632 width=w, height=h)
633 png(file.path(eicdir, "%003d.png"), width = w, height = h,
634 units = "px")
635 } else {
636 xcmsBoxPlot(values[seq(length = eicmax),],
637 sampclass(object), dirpath=boxdir, pic="pdf",
638 width=w, height=h)
639 pdf(file.path(eicdir, "%003d.pdf"), width = w/72,
640 height = h/72, onefile = FALSE)
641 }
642 }
643
644 plotSpecWindow(object, gidxs = tsidx[seq(length = eicmax)], borderwidth=1)
645
646 if (length(filebase))
647 dev.off()
648 }
649 }
650
651 invisible(twosamp)
652 }