Mercurial > repos > lecorguille > camera_annotate
comparison lib.r @ 19:01459b73daf9 draft
"planemo upload commit 4fcbbcbc6d6b0a59e801870d31fe886a920ef429"
author | workflow4metabolomics |
---|---|
date | Thu, 13 Feb 2020 17:23:07 -0500 |
parents | cb923396e70f |
children | b979ba5888f7 |
comparison
equal
deleted
inserted
replaced
18:cb923396e70f | 19:01459b73daf9 |
---|---|
364 | 364 |
365 } | 365 } |
366 return (directory) | 366 return (directory) |
367 } | 367 } |
368 | 368 |
369 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
370 # https://github.com/sneumann/CAMERA/issues/33#issuecomment-405168524 | |
371 # https://github.com/sneumann/xcms/commit/950a3fe794cdb6b0fda88696e31aab3d97a3b7dd | |
372 ############################################################ | |
373 ## getEIC | |
374 getEIC <- function(object, mzrange, rtrange = 200, | |
375 groupidx, sampleidx = sampnames(object), | |
376 rt = c("corrected", "raw")) { | |
377 | |
378 files <- filepaths(object) | |
379 grp <- groups(object) | |
380 samp <- sampnames(object) | |
381 prof <- profinfo(object) | |
382 | |
383 rt <- match.arg(rt) | |
384 | |
385 if (is.numeric(sampleidx)) | |
386 sampleidx <- sampnames(object)[sampleidx] | |
387 sampidx <- match(sampleidx, sampnames(object)) | |
388 | |
389 if (!missing(groupidx)) { | |
390 if (is.numeric(groupidx)) | |
391 groupidx <- groupnames(object)[unique(as.integer(groupidx))] | |
392 grpidx <- match(groupidx, groupnames(object, template = groupidx)) | |
393 } | |
394 | |
395 if (missing(mzrange)) { | |
396 if (missing(groupidx)) | |
397 stop("No m/z range or groups specified") | |
398 if (any(is.na(groupval(object, value = "mz")))) | |
399 warning( | |
400 "`NA` values in xcmsSet. Use fillPeaks() on the object to fill", | |
401 "-in missing peak values. Note however that this will also ", | |
402 "insert intensities of 0 for peaks that can not be filled in.") | |
403 mzmin <- apply(groupval(object, value = "mzmin"), 1, min, na.rm = TRUE) | |
404 mzmax <- apply(groupval(object, value = "mzmax"), 1, max, na.rm = TRUE) | |
405 mzrange <- matrix(c(mzmin[grpidx], mzmax[grpidx]), ncol = 2) | |
406 ## if (any(is.na(groupval(object, value = "mz")))) | |
407 ## stop('Please use fillPeaks() to fill up NA values !') | |
408 ## mzmin <- -rowMax(-groupval(object, value = "mzmin")) | |
409 ## mzmax <- rowMax(groupval(object, value = "mzmax")) | |
410 ## mzrange <- matrix(c(mzmin[grpidx], mzmax[grpidx]), ncol = 2) | |
411 } else if (all(c("mzmin","mzmax") %in% colnames(mzrange))) | |
412 mzrange <- mzrange[,c("mzmin", "mzmax"),drop=FALSE] | |
413 else if (is.null(dim(mzrange))) | |
414 stop("mzrange must be a matrix") | |
415 colnames(mzrange) <- c("mzmin", "mzmax") | |
416 | |
417 if (length(rtrange) == 1) { | |
418 if (missing(groupidx)) | |
419 rtrange <- matrix(rep(range(object@rt[[rt]][sampidx]), nrow(mzrange)), | |
420 ncol = 2, byrow = TRUE) | |
421 else { | |
422 rtrange <- retexp(grp[grpidx,c("rtmin","rtmax"),drop=FALSE], rtrange) | |
423 } | |
424 } else if (is.null(dim(rtrange))) | |
425 stop("rtrange must be a matrix or single number") | |
426 colnames(rtrange) <- c("rtmin", "rtmax") | |
427 | |
428 ## Ensure that we've got corrected retention time if requested. | |
429 if (is.null(object@rt[[rt]])) | |
430 stop(rt, " retention times not present in 'object'!") | |
431 | |
432 ## Ensure that the defined retention time range is within the rtrange of the | |
433 ## object: we're using the max minimal rt of all files and the min maximal rt | |
434 rtrs <- lapply(object@rt[[rt]], range) | |
435 rtr <- c(max(unlist(lapply(rtrs, "[", 1))), | |
436 min(unlist(lapply(rtrs, "[", 2)))) | |
437 ## Check if we've got a range which is completely off: | |
438 if (any(rtrange[, "rtmin"] >= rtr[2] | rtrange[, "rtmax"] <= rtr[1])) { | |
439 outs <- which(rtrange[, "rtmin"] >= rtr[2] | | |
440 rtrange[, "rtmax"] <= rtr[1]) | |
441 stop(length(outs), " of the specified 'rtrange' are completely outside ", | |
442 "of the retention time range of 'object' which is (", rtr[1], ", ", | |
443 rtr[2], "). The first was: (", rtrange[outs[1], "rtmin"], ", ", | |
444 rtrange[outs[1], "rtmax"], "!") | |
445 } | |
446 lower_rt_outside <- rtrange[, "rtmin"] < rtr[1] | |
447 upper_rt_outside <- rtrange[, "rtmax"] > rtr[2] | |
448 if (any(lower_rt_outside) | any(upper_rt_outside)) { | |
449 ## Silently fix these ranges. | |
450 rtrange[lower_rt_outside, "rtmin"] <- rtr[1] | |
451 rtrange[upper_rt_outside, "rtmax"] <- rtr[2] | |
452 } | |
453 | |
454 if (missing(groupidx)) | |
455 gnames <- character(0) | |
456 else | |
457 gnames <- groupidx | |
458 | |
459 eic <- vector("list", length(sampleidx)) | |
460 names(eic) <- sampleidx | |
461 | |
462 for (i in seq(along = sampidx)) { | |
463 | |
464 ## cat(sampleidx[i], "") | |
465 flush.console() | |
466 ## getXcmsRaw takes care of rt correction, susetting to scanrage and other | |
467 ## stuff. | |
468 lcraw <- getXcmsRaw(object, sampleidx = sampidx[i], rt=rt) | |
469 currenteic <- xcms::getEIC(lcraw, mzrange, rtrange, step = prof$step) | |
470 eic[[i]] <- currenteic@eic[[1]] | |
471 rm(lcraw) | |
472 gc() | |
473 } | |
474 ## cat("\n") | |
475 | |
476 invisible(new("xcmsEIC", eic = eic, mzrange = mzrange, rtrange = rtrange, | |
477 rt = rt, groupnames = gnames)) | |
478 } | |
479 | |
480 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
481 # https://github.com/sneumann/CAMERA/issues/33#issuecomment-405168524 | |
482 # https://github.com/sneumann/xcms/commit/950a3fe794cdb6b0fda88696e31aab3d97a3b7dd | |
483 ############################################################ | |
484 ## diffreport | |
485 diffreport = function(object, | |
486 class1 = levels(sampclass(object))[1], | |
487 class2 = levels(sampclass(object))[2], | |
488 filebase = character(), | |
489 eicmax = 0, eicwidth = 200, | |
490 sortpval = TRUE, | |
491 classeic = c(class1,class2), | |
492 value = c("into","maxo","intb"), | |
493 metlin = FALSE, | |
494 h = 480, w = 640, mzdec=2, | |
495 missing = numeric(), ...) { | |
496 | |
497 if ( nrow(object@groups)<1 || length(object@groupidx) <1) { | |
498 stop("No group information. Use group().") | |
499 } | |
500 | |
501 if (!is.numeric(w) || !is.numeric(h)) | |
502 stop("'h' and 'w' have to be numeric") | |
503 ## require(multtest) || stop("Couldn't load multtest") | |
504 | |
505 value <- match.arg(value) | |
506 groupmat <- groups(object) | |
507 if (length(groupmat) == 0) | |
508 stop("No group information found") | |
509 samples <- sampnames(object) | |
510 n <- length(samples) | |
511 classlabel <- sampclass(object) | |
512 classlabel <- levels(classlabel)[as.vector(unclass(classlabel))] | |
513 | |
514 values <- groupval(object, "medret", value=value) | |
515 indecies <- groupval(object, "medret", value = "index") | |
516 | |
517 if (!all(c(class1,class2) %in% classlabel)) | |
518 stop("Incorrect Class Labels") | |
519 | |
520 ## c1 and c2 are column indices of class1 and class2 resp. | |
521 c1 <- which(classlabel %in% class1) | |
522 c2 <- which(classlabel %in% class2) | |
523 ceic <- which(classlabel %in% classeic) | |
524 if (length(intersect(c1, c2)) > 0) | |
525 stop("Intersecting Classes") | |
526 | |
527 ## Optionally replace NA values with the value provided with missing | |
528 if (length(missing)) { | |
529 if (is.numeric(missing)) { | |
530 ## handles NA, Inf and -Inf | |
531 values[, c(c1, c2)][!is.finite(values[, c(c1, c2)])] <- missing[1] | |
532 } else | |
533 stop("'missing' should be numeric") | |
534 } | |
535 ## Check against missing Values | |
536 if (any(is.na(values[, c(c1, c2)]))) | |
537 warning("`NA` values in xcmsSet. Use fillPeaks() on the object to fill", | |
538 "-in missing peak values. Note however that this will also ", | |
539 "insert intensities of 0 for peaks that can not be filled in.") | |
540 | |
541 mean1 <- rowMeans(values[,c1,drop=FALSE], na.rm = TRUE) | |
542 mean2 <- rowMeans(values[,c2,drop=FALSE], na.rm = TRUE) | |
543 | |
544 ## Calculate fold change. | |
545 ## For foldchange <1 set fold to 1/fold | |
546 ## See tstat to check which was higher | |
547 fold <- mean2 / mean1 | |
548 fold[!is.na(fold) & fold < 1] <- 1/fold[!is.na(fold) & fold < 1] | |
549 | |
550 testval <- values[,c(c1,c2)] | |
551 ## Replace eventual infinite values with NA (CAMERA issue #33) | |
552 testval[is.infinite(testval)] <- NA | |
553 testclab <- c(rep(0,length(c1)),rep(1,length(c2))) | |
554 | |
555 if (min(length(c1), length(c2)) >= 2) { | |
556 tstat <- mt.teststat(testval, testclab, ...) | |
557 pvalue <- xcms:::pval(testval, testclab, tstat) | |
558 } else { | |
559 message("Too few samples per class, skipping t-test.") | |
560 tstat <- pvalue <- rep(NA,nrow(testval)) | |
561 } | |
562 stat <- data.frame(fold = fold, tstat = tstat, pvalue = pvalue) | |
563 if (length(levels(sampclass(object))) >2) { | |
564 pvalAnova<-c() | |
565 for(i in 1:nrow(values)){ | |
566 var<-as.numeric(values[i,]) | |
567 ano<-summary(aov(var ~ sampclass(object)) ) | |
568 pvalAnova<-append(pvalAnova, unlist(ano)["Pr(>F)1"]) | |
569 } | |
570 stat<-cbind(stat, anova= pvalAnova) | |
571 } | |
572 if (metlin) { | |
573 neutralmass <- groupmat[,"mzmed"] + ifelse(metlin < 0, 1, -1) | |
574 metlin <- abs(metlin) | |
575 digits <- ceiling(-log10(metlin))+1 | |
576 metlinurl <- | |
577 paste("http://metlin.scripps.edu/simple_search_result.php?mass_min=", | |
578 round(neutralmass - metlin, digits), "&mass_max=", | |
579 round(neutralmass + metlin, digits), sep="") | |
580 values <- cbind(metlin = metlinurl, values) | |
581 } | |
582 twosamp <- cbind(name = groupnames(object), stat, groupmat, values) | |
583 if (sortpval) { | |
584 tsidx <- order(twosamp[,"pvalue"]) | |
585 twosamp <- twosamp[tsidx,] | |
586 rownames(twosamp) <- 1:nrow(twosamp) | |
587 values<-values[tsidx,] | |
588 } else | |
589 tsidx <- 1:nrow(values) | |
590 | |
591 if (length(filebase)) | |
592 write.table(twosamp, paste(filebase, ".tsv", sep = ""), quote = FALSE, sep = "\t", col.names = NA) | |
593 | |
594 if (eicmax > 0) { | |
595 if (length(unique(peaks(object)[,"rt"])) > 1) { | |
596 ## This looks like "normal" LC data | |
597 | |
598 eicmax <- min(eicmax, length(tsidx)) | |
599 eics <- getEIC(object, rtrange = eicwidth*1.1, sampleidx = ceic, | |
600 groupidx = tsidx[seq(length = eicmax)]) | |
601 | |
602 if (length(filebase)) { | |
603 eicdir <- paste(filebase, "_eic", sep="") | |
604 boxdir <- paste(filebase, "_box", sep="") | |
605 dir.create(eicdir) | |
606 dir.create(boxdir) | |
607 if (capabilities("png")){ | |
608 xcms:::xcmsBoxPlot(values[seq(length = eicmax),], | |
609 sampclass(object), dirpath=boxdir, pic="png", width=w, height=h) | |
610 png(file.path(eicdir, "%003d.png"), width = w, height = h) | |
611 } else { | |
612 xcms:::xcmsBoxPlot(values[seq(length = eicmax),], | |
613 sampclass(object), dirpath=boxdir, pic="pdf", width=w, height=h) | |
614 pdf(file.path(eicdir, "%003d.pdf"), width = w/72, | |
615 height = h/72, onefile = FALSE) | |
616 } | |
617 } | |
618 plot(eics, object, rtrange = eicwidth, mzdec=mzdec) | |
619 | |
620 if (length(filebase)) | |
621 dev.off() | |
622 } else { | |
623 ## This looks like a direct-infusion single spectrum | |
624 if (length(filebase)) { | |
625 eicdir <- paste(filebase, "_eic", sep="") | |
626 boxdir <- paste(filebase, "_box", sep="") | |
627 dir.create(eicdir) | |
628 dir.create(boxdir) | |
629 if (capabilities("png")){ | |
630 xcmsBoxPlot(values[seq(length = eicmax),], | |
631 sampclass(object), dirpath=boxdir, pic="png", | |
632 width=w, height=h) | |
633 png(file.path(eicdir, "%003d.png"), width = w, height = h, | |
634 units = "px") | |
635 } else { | |
636 xcmsBoxPlot(values[seq(length = eicmax),], | |
637 sampclass(object), dirpath=boxdir, pic="pdf", | |
638 width=w, height=h) | |
639 pdf(file.path(eicdir, "%003d.pdf"), width = w/72, | |
640 height = h/72, onefile = FALSE) | |
641 } | |
642 } | |
643 | |
644 plotSpecWindow(object, gidxs = tsidx[seq(length = eicmax)], borderwidth=1) | |
645 | |
646 if (length(filebase)) | |
647 dev.off() | |
648 } | |
649 } | |
650 | |
651 invisible(twosamp) | |
652 } |