Mercurial > repos > yguitton > metams_rungc
diff lib_metams.r @ 4:c10824185547 draft
planemo upload for repository https://github.com/workflow4metabolomics/metaMS commit 174a1713024f246c1485cbd75218577e89353522
author | workflow4metabolomics |
---|---|
date | Wed, 03 Jul 2019 05:14:32 -0400 |
parents | c75532b75ba1 |
children | b8d4129dd2a6 |
line wrap: on
line diff
--- a/lib_metams.r Thu Jun 08 11:53:35 2017 -0400 +++ b/lib_metams.r Wed Jul 03 05:14:32 2019 -0400 @@ -1,504 +1,87 @@ -# lib_metams.r version 2.0.0 +# lib_metams.r version 2.1.1 # R function for metaMS runGC under W4M # author Yann GUITTON CNRS IRISA/LINA Idealg project 2014-2015 # author Yann GUITTON Oniris Laberca 2015-2017 -##ADDITIONS FROM Y. Guitton -getBPC <- function(file,rtcor=NULL, ...) { - object <- xcmsRaw(file) - sel <- profRange(object, ...) - cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) - +#@author G. Le Corguille +# This function will +# - load the packages +# - display the sessionInfo +loadAndDisplayPackages <- function(pkgs) { + for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) + + sessioninfo = sessionInfo() + cat(sessioninfo$R.version$version.string,"\n") + cat("Main packages:\n") + for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") + cat("Other loaded packages:\n") + for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") +} + +#This function list the compatible files within the directory as xcms did +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM +getMSFiles <- function (directory) { + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) + files <- c(directory[!info$isdir], listed) + exists <- file.exists(files) + files <- files[exists] + return(files) } -getBPC2s <- function (files, pdfname="BPCs.pdf", rt = c("raw","corrected"), scanrange=NULL) { - require(xcms) - - - - #create sampleMetadata, get sampleMetadata and class - sampleMetadata<-xcms:::phenoDataFromPaths(files) - class<-class<-as.vector(levels(sampleMetadata[,"class"])) #create phenoData like table - classnames<-vector("list",length(class)) - for (i in 1:length(class)){ - classnames[[i]]<-which( sampleMetadata[,1]==class[i]) - } - - N <- dim(sampleMetadata)[1] - - - - TIC <- vector("list",N) - - - for (j in 1:N) { - - cat(files[j],"\n") - TIC[[j]] <- getBPC(files[j]) - #good for raw - # seems strange for corrected - #errors if scanrange used in xcmsSetgeneration - if (!is.null(xcmsSet) && rt == "corrected") - rtcor <- xcmsSet@rt$corrected[[j]] else - rtcor <- NULL - TIC[[j]] <- getBPC(files[j],rtcor=rtcor) +# This function retrieve a xset like object +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr +getxcmsSetObject <- function(xobject) { + # XCMS 1.x + if (class(xobject) == "xcmsSet") + return (xobject) + # XCMS 3.x + if (class(xobject) == "XCMSnExp") { + # Get the legacy xcmsSet object + suppressWarnings(xset <- as(xobject, 'xcmsSet')) + if (!is.null(xset@phenoData$sample_group)) + sampclass(xset) <- xset@phenoData$sample_group + else + sampclass(xset) <- "." + return (xset) } - - pdf(pdfname,w=16,h=10) - cols <- rainbow(N) - lty = 1:N - pch = 1:N - #search for max x and max y in BPCs - xlim = range(sapply(TIC, function(x) range(x[,1]))) - ylim = range(sapply(TIC, function(x) range(x[,2]))) - ylim = c(-ylim[2], ylim[2]) - - - ##plot start - - if (length(class)>2){ - for (k in 1:(length(class)-1)){ - for (l in (k+1):length(class)){ - print(paste(class[k],"vs",class[l],sep=" ")) - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") - colvect<-NULL - for (j in 1:length(classnames[[k]])) { - - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } - for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) - } - } - }#end if length >2 - if (length(class)==2){ - k=1 - l=2 - colvect<-NULL - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") - - for (j in 1:length(classnames[[k]])) { - - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } - for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) - - }#end length ==2 - if (length(class)==1){ - k=1 - ylim = range(sapply(TIC, function(x) range(x[,2]))) - colvect<-NULL - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") - - for (j in 1:length(classnames[[k]])) { - - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } - - legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) - - }#end length ==1 - dev.off() - - # invisible(TIC) } -getTIC <- function(file,rtcor=NULL) { - object <- xcmsRaw(file) - cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) -} - -## -## overlay TIC from all files in current folder or from xcmsSet, create pdf -## -getTIC2s <- function(files, pdfname="TICs.pdf", rt=c("raw","corrected")) { - - #create sampleMetadata, get sampleMetadata and class - sampleMetadata<-xcms:::phenoDataFromPaths(files) - class<-class<-as.vector(levels(sampleMetadata[,"class"])) #create phenoData like table - classnames<-vector("list",length(class)) - for (i in 1:length(class)){ - classnames[[i]]<-which( sampleMetadata[,1]==class[i]) - } - - N <- dim(sampleMetadata)[1] - TIC <- vector("list",N) +#J.Saint-Vanne +#Function to correct the file names which can be written like "..alg8.mzData" and we just want "alg8" +getCorrectFileName <- function(peaktable,sampleMetadata){ - for (i in 1:N) { - cat(files[i],"\n") - if (!is.null(xcmsSet) && rt == "corrected") - rtcor <- xcmsSet@rt$corrected[[i]] - else - rtcor <- NULL - TIC[[i]] <- getTIC(files[i],rtcor=rtcor) + #Try to start for the first sample, avoid description of line with colnamesdontwant + i <- 1 + while(!(sampleMetadata[1,1] %in% strsplit(colnames(peaktable)[i],"\\.")[[1]])) { + if(i < length(peaktable)) { + i <- i + 1 + } else { + break + } } - - pdf(pdfname,w=16,h=10) - cols <- rainbow(N) - lty = 1:N - pch = 1:N - #search for max x and max y in TICs - xlim = range(sapply(TIC, function(x) range(x[,1]))) - ylim = range(sapply(TIC, function(x) range(x[,2]))) - ylim = c(-ylim[2], ylim[2]) - - - ##plot start - if (length(class)>2){ - for (k in 1:(length(class)-1)){ - for (l in (k+1):length(class)){ - print(paste(class[k],"vs",class[l],sep=" ")) - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") - colvect<-NULL - for (j in 1:length(classnames[[k]])) { - - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) + #i now correspond to the first column with a sample + for(j in 1:(nrow(sampleMetadata))) { + col <- j + i-1 #minus 1 cause i is the good column to start and j start at 1 + if(col <= length(colnames(peaktable))) { + newname <- gsub("(^.*)(\\..*$)","\\1",colnames(peaktable)[col]) + if(newname != sampleMetadata[j,1]){ + #Correction for 2 points starting the name (I don't know why they are here...) + if(".." == gsub("(^\\.+)(.*)","\\1",newname)){ + newname <- sub("(^\\.+)(.*)","\\2",newname) } - for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) } - } - }#end if length >2 - if (length(class)==2){ - - - k=1 - l=2 - - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") - colvect<-NULL - for (j in 1:length(classnames[[k]])) { - - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) + colnames(peaktable)[col] <- newname } - for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) - - }#end length ==2 - if (length(class)==1){ - k=1 - ylim = range(sapply(TIC, function(x) range(x[,2]))) - - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") - colvect<-NULL - for (j in 1:length(classnames[[k]])) { - - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } - - legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) - - }#end length ==1 - dev.off() - - # invisible(TIC) + } + return(peaktable) } -##addition for quality control of peak picking -#metaMS EIC and pspectra plotting option -#version 20150512 -#only for Galaxy - -plotUnknowns<-function(resGC, unkn=""){ - - - ##Annotation table each value is a pcgrp associated to the unknown - ##NOTE pcgrp index are different between xcmsSet and resGC due to filtering steps in metaMS - ##R. Wehrens give me some clues on that and we found a correction - #if unkn="none" - - if(unkn=="none") { - pdf("Unknown_Empty.pdf") - plot.new() - text(x=0.5,y=1,pos=1, labels="No EIC ploting required") - dev.off() - }else { - - mat<-matrix(ncol=length(resGC$xset), nrow=dim(resGC$PeakTable)[1]) - - for (j in 1: length(resGC$xset)){ - test<-resGC$annotation[[j]] - print(paste("j=",j)) - for (i in 1:dim(test)[1]){ - if (as.numeric(row.names(test)[i])>dim(mat)[1]){ - next - } else { - mat[as.numeric(row.names(test)[i]),j]<-test[i,1] - } - } - } - colnames(mat)<-colnames(resGC$PeakTable[,c((which(colnames(resGC$PeakTable)=="rt"|colnames(resGC$PeakTable)=="RI")[length(which(colnames(resGC$PeakTable)=="rt"|colnames(resGC$PeakTable)=="RI"))]+1):dim(resGC$PeakTable)[2])]) - - #debug - - # print(dim(mat)) - # print(mat[1:3,]) - # write.table(mat, file="myannotationtable.tsv", sep="\t", row.names=FALSE) - #correction of annotation matrix due to pcgrp removal by quality check in runGCresult - #matrix of correspondance between an@pspectra and filtered pspectra from runGC - - allPCGRPs <- - lapply(1:length(resGC$xset), - function(i) { - an <- resGC$xset[[i]] - huhn <- an@pspectra[which(sapply(an@pspectra, length) >= - metaSetting(resGC$settings, - "DBconstruction.minfeat"))] - matCORR<-cbind(1:length(huhn), match(huhn, an@pspectra)) - }) - - if (unkn[1]==""){ - #plot EIC and spectra for all unknown for comparative purpose - - - par (mar=c(5, 4, 4, 2) + 0.1) - for (l in 1:dim(resGC$PeakTable)[1]){ #l=2 - #recordPlot - perpage=3 #if change change layout also! - num.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page - my.plots <- vector(num.plots, mode='list') - dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",l,".pdf", sep="")) #A4 pdf - # par(mfrow=c(perpage,2)) - layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage)) - # layout.show(6) - oma.saved <- par("oma") - par(oma = rep.int(0, 4)) - par(oma = oma.saved) - o.par <- par(mar = rep.int(0, 4)) - on.exit(par(o.par)) - stop=0 #initialize - for (i in 1:num.plots) { - start=stop+1 - stop=start+perpage-1 # - for (c in start:stop){ - if (c <=dim(mat)[2]){ - - #get sample name - sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths) - - #remove .cdf, .mzXML filepattern - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", - "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), - collapse = "|") - sampname<-gsub(filepattern, "",sampname) - - title1<-paste("unknown", l,"from",sampname, sep=" ") - an<-resGC$xset[[c]] - - par (mar=c(0, 0, 0, 0) + 0.1) - plot.new() - box() - text(0.5, 0.5, title1, cex=2) - if (!is.na(mat[l,c])){ - pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[l,c]),2] - if (pcgrp!=mat[l,c]) print ("pcgrp changed") - par (mar=c(3, 2.5, 3, 1.5) + 0.1) - plotEICs(an, pspec=pcgrp, maxlabel=2) - plotPsSpectrum(an, pspec=pcgrp, maxlabel=2) - } else { - plot.new() - box() - text(0.5, 0.5, "NOT FOUND", cex=2) - plot.new() - box() - text(0.5, 0.5, "NOT FOUND", cex=2) - } - } - } - # my.plots[[i]] <- recordPlot() - } - graphics.off() - - # pdf(file=paste("Unknown_",l,".pdf", sep=""), onefile=TRUE) - # for (my.plot in my.plots) { - # replayPlot(my.plot) - # } - # my.plots - # graphics.off() - - }#end for l - }#end if unkn="" - else{ - par (mar=c(5, 4, 4, 2) + 0.1) - l=unkn - if (length(l)==1){ - #recordPlot - perpage=3 #if change change layout also! - num.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page - my.plots <- vector(num.plots, mode='list') - - dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",l,".pdf", sep="")) #A4 pdf - # par(mfrow=c(perpage,2)) - layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage)) - # layout.show(6) - oma.saved <- par("oma") - par(oma = rep.int(0, 4)) - par(oma = oma.saved) - o.par <- par(mar = rep.int(0, 4)) - on.exit(par(o.par)) - stop=0 #initialize - for (i in 1:num.plots) { - start=stop+1 - stop=start+perpage-1 # - for (c in start:stop){ - if (c <=dim(mat)[2]){ - - #get sample name - sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths) - - #remove .cdf, .mzXML filepattern - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") - sampname<-gsub(filepattern, "",sampname) - - title1<-paste("unknown", l,"from",sampname, sep=" ") - an<-resGC$xset[[c]] - - par (mar=c(0, 0, 0, 0) + 0.1) - plot.new() - box() - text(0.5, 0.5, title1, cex=2) - if (!is.na(mat[l,c])){ - pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[l,c]),2] - if (pcgrp!=mat[l,c]) print ("pcgrp changed") - par (mar=c(3, 2.5, 3, 1.5) + 0.1) - plotEICs(an, pspec=pcgrp, maxlabel=2) - plotPsSpectrum(an, pspec=pcgrp, maxlabel=2) - } else { - plot.new() - box() - text(0.5, 0.5, "NOT FOUND", cex=2) - plot.new() - box() - text(0.5, 0.5, "NOT FOUND", cex=2) - } - } - } - # my.plots[[i]] <- recordPlot() - } - graphics.off() - - # pdf(file=paste("Unknown_",l,".pdf", sep=""), onefile=TRUE) - # for (my.plot in my.plots) { - # replayPlot(my.plot) - # } - # my.plots - # graphics.off() - } else { - par (mar=c(5, 4, 4, 2) + 0.1) - for (l in 1:length(unkn)){ #l=2 - #recordPlot - perpage=3 #if change change layout also! - num.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page - my.plots <- vector(num.plots, mode='list') - dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",unkn[l],".pdf", sep="")) #A4 pdf - # par(mfrow=c(perpage,2)) - layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage)) - # layout.show(6) - oma.saved <- par("oma") - par(oma = rep.int(0, 4)) - par(oma = oma.saved) - o.par <- par(mar = rep.int(0, 4)) - on.exit(par(o.par)) - stop=0 #initialize - for (i in 1:num.plots) { - start=stop+1 - stop=start+perpage-1 # - for (c in start:stop){ - if (c <=dim(mat)[2]){ - - #get sample name - sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths) - - #remove .cdf, .mzXML filepattern - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") - sampname<-gsub(filepattern, "",sampname) - - title1<-paste("unknown",unkn[l],"from",sampname, sep=" ") - an<-resGC$xset[[c]] - - par (mar=c(0, 0, 0, 0) + 0.1) - plot.new() - box() - text(0.5, 0.5, title1, cex=2) - if (!is.na(mat[unkn[l],c])){ - pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[unkn[l],c]),2] - if (pcgrp!=mat[unkn[l],c]) print ("pcgrp changed") - par (mar=c(3, 2.5, 3, 1.5) + 0.1) - plotEICs(an, pspec=pcgrp, maxlabel=2) - plotPsSpectrum(an, pspec=pcgrp, maxlabel=2) - } else { - plot.new() - box() - text(0.5, 0.5, "NOT FOUND", cex=2) - plot.new() - box() - text(0.5, 0.5, "NOT FOUND", cex=2) - } - } - } - # my.plots[[i]] <- recordPlot() - } - graphics.off() - - # pdf(file=paste("Unknown_",unkn[l],".pdf", sep=""), onefile=TRUE) - # for (my.plot in my.plots) { - # replayPlot(my.plot) - # } - # my.plots - # graphics.off() - - }#end for unkn[l] - - } - - } - } -} #end function - +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr # This function get the raw file path from the arguments getRawfilePathFromArguments <- function(singlefile, zipfile, listArguments) { if (!is.null(listArguments[["zipfile"]])) zipfile = listArguments[["zipfile"]] @@ -535,6 +118,7 @@ } +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr # This function retrieve the raw file in the working directory # - if zipfile: unzip the file with its directory tree # - if singlefiles: set symlink with the good filename @@ -546,11 +130,9 @@ error_message=paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") print(error_message); stop(error_message) } - file.symlink(singlefile_galaxyPath,singlefile_sampleName) } directory = "." - } if(!is.null(zipfile) && (zipfile!="")) { if(!file.exists(zipfile)){ @@ -573,8 +155,349 @@ if (length(directories) == 1) directory = directories cat("files_root_directory\t",directory,"\n") - } return (directory) } +##ADDITIONS FROM Y. Guitton +getBPC <- function(file,rtcor=NULL, ...) { + object <- xcmsRaw(file) + sel <- profRange(object, ...) + cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) +} + +getBPC2s <- function (files, xset = NULL, pdfname="BPCs.pdf", rt = c("raw","corrected"), scanrange=NULL) { + require(xcms) + + #create sampleMetadata, get sampleMetadata and class + if(!is.null(xset)) { + #When files come from XCMS3 directly before metaMS + sampleMetadata <- xset@phenoData + } else { + #When files come from a zip directory with raw files before metaMS + sampleMetadata<-xcms:::phenoDataFromPaths(files) + } + class<-unique(sampleMetadata[,"class"]) #create phenoData like table + classnames<-vector("list",length(class)) + for (i in 1:length(class)){ + classnames[[i]]<-which( sampleMetadata[,"class"]==class[i]) + } + + N <- dim(sampleMetadata)[1] + TIC <- vector("list",N) + + for (j in 1:N) { + TIC[[j]] <- getBPC(files[j]) + #good for raw + # seems strange for corrected + #errors if scanrange used in xcmsSetgeneration + if (!is.null(xcmsSet) && rt == "corrected"){ + rtcor <- xcmsSet@rt$corrected[[j]] + }else{ + rtcor <- NULL + } + TIC[[j]] <- getBPC(files[j],rtcor=rtcor) + } + + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in BPCs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) + + ##plot start + if (length(class)>2){ + for (k in 1:(length(class)-1)){ + for (l in (k+1):length(class)){ + cat(paste(class[k],"vs",class[l],sep=" ","\n")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(gsub("(^.+)\\..*$","\\1",basename(files[c(classnames[[k]],classnames[[l]])]))), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + + if (length(class)==2){ + k=1 + l=2 + colvect<-NULL + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(gsub("(^.+)\\..*$","\\1",basename(files[c(classnames[[k]],classnames[[l]])]))), col = colvect, lty = lty, pch = pch) + }#end length ==2 + + if (length(class)==1){ + k=1 + ylim = range(sapply(TIC, function(x) range(x[,2]))) + colvect<-NULL + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + legend("topright",paste(gsub("(^.+)\\..*$","\\1",basename(files[c(classnames[[k]])]))), col = colvect, lty = lty, pch = pch) + }#end length ==1 + dev.off() +} + +getTIC <- function(file,rtcor=NULL) { + object <- xcmsRaw(file) + cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) +} + +## overlay TIC from all files in current folder or from xcmsSet, create pdf +getTIC2s <- function(files, xset=NULL, pdfname="TICs.pdf", rt=c("raw","corrected")) { + require(xcms) + + #create sampleMetadata, get sampleMetadata and class + if(!is.null(xset)){ + #When files come from XCMS3 before metaMS treatment + sampleMetadata<-xset@phenoData + } else { + #When files come from a zip directory with raw files before metaMS + sampleMetadata<-xcms:::phenoDataFromPaths(files) + } + class<-as.vector(levels(sampleMetadata[,"class"])) #create phenoData like table + classnames<-vector("list",length(class)) + for (i in 1:length(class)){ + classnames[[i]]<-which( sampleMetadata[,"class"]==class[i]) + } + + N <- dim(sampleMetadata)[1] + TIC <- vector("list",N) + + for (i in 1:N) { + cat(files[i],"\n") + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[i]] + else + rtcor <- NULL + TIC[[i]] <- getTIC(files[i],rtcor=rtcor) + } + + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in TICs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) + + ##plot start + if (length(class)>2){ + for (k in 1:(length(class)-1)){ + for (l in (k+1):length(class)){ + print(paste(class[k],"vs",class[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(gsub("(^.+)\\..*$","\\1",basename(files[c(classnames[[k]],classnames[[l]])]))), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + + if (length(class)==2){ + k=1 + l=2 + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(gsub("(^.+)\\..*$","\\1",basename(files[c(classnames[[k]],classnames[[l]])]))), col = colvect, lty = lty, pch = pch) + }#end length ==2 + + if (length(class)==1){ + k=1 + ylim = range(sapply(TIC, function(x) range(x[,2]))) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + legend("topright",paste(gsub("(^.+)\\..*$","\\1",basename(files[c(classnames[[k]])]))), col = colvect, lty = lty, pch = pch) + }#end length ==1 + dev.off() +} + + +#Update by J.Saint-Vanne +##addition for quality control of peak picking +#metaMS EIC and pspectra plotting option +#version 20190520 +#only for Galaxy +plotUnknowns<-function(resGC, unkn="", DB=NULL, fileFrom=NULL){ + + ##Annotation table each value is a pcgrp associated to the unknown + ##NOTE pcgrp index are different between xcmsSet and resGC due to filtering steps in metaMS + ##R. Wehrens give me some clues on that and we found a correction + + #correction of annotation matrix due to pcgrp removal by quality check in runGCresult + #matrix of correspondance between an@pspectra and filtered pspectra from runGC + #Select only pspectra which correpond to them select in metaMS + # col1 = filtered spectra from runGC and col2 = an@spectra + allPCGRPs <-lapply(1:length(resGC$xset), + function(i) { + an <- resGC$xset[[i]] + huhn <- an@pspectra[which(sapply(an@pspectra, length) >= + metaSetting(resGC$settings, + "DBconstruction.minfeat"))] + matCORR<-cbind(1:length(huhn), match(huhn, an@pspectra)) + }) + #Build a new annotation list with sampnames and pseudospectra number from xset + helpannotation <- list() + for(j in 1:length(resGC$xset)){ + helpannotation[[j]] <- resGC$annotation[[j]][1:2] + pspvector <- vector() + for(i in 1: nrow(helpannotation[[j]])){ + #Find corresponding pspec + psplink <- allPCGRPs[[j]][match(helpannotation[[j]][i,1],allPCGRPs[[j]]),2] + pspvector <- c(pspvector,psplink) + #Change the annotation column into sampname column + if(helpannotation[[j]][i,2] < 0){ + #It's an unknown + new_name <- paste("Unknown",abs(as.integer(helpannotation[[j]][i,2]))) + helpannotation[[j]][i,2] <- new_name + }else{ + #It has been found in local database + for(k in 1:length(DB)){ + if(helpannotation[[j]][i,2] == k){ + helpannotation[[j]][i,2] <- DB[[k]]$Name + break + } + } + } + } + helpannotation[[j]] <- cbind(helpannotation[[j]],pspvector) + names(helpannotation)[j] <- names(resGC$annotation[j]) + } + peaktable <- resGC$PeakTable + + par (mar=c(5, 4, 4, 2) + 0.1) + #For each unknown + for (l in 1:length(unkn)){ + #recordPlot + perpage=3 #if change change layout also! + dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",unkn[l],".pdf", sep="")) #A4 pdf + # par(mfrow=c(perpage,2)) + layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage)) + # layout.show(6) + oma.saved <- par("oma") + par(oma = rep.int(0, 4)) + par(oma = oma.saved) + o.par <- par(mar = rep.int(0, 4)) + on.exit(par(o.par)) + #For each sample + for (c in 1:length(resGC$xset)) { + #get sample name + sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths) + #remove .cdf, .mzXML filepattern + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", + "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") + sampname<-gsub(filepattern, "",sampname) + title1<-paste(peaktable[unkn[l],1],"from",sampname, sep = " ") + an<-resGC$xset[[c]] + if(fileFrom == "zipfile") { + an@xcmsSet@filepaths <- paste0("./",an@xcmsSet@phenoData[,"class"],"/",basename(an@xcmsSet@filepaths)) + }#else { + #print(an@xcmsSet@filepaths) + #an@xcmsSet@filepaths <- paste0("./",basename(an@xcmsSet@filepaths)) + #} + #Find the good annotation for this sample + for(a in 1:length(helpannotation)){ + if(gsub(filepattern, "", names(helpannotation)[a]) == paste0("./",sampname)){ + #Find the unkn or the matched std in this sample + findunkn <- FALSE + for(r in 1:nrow(helpannotation[[a]])){ + if(helpannotation[[a]][r,"annotation"] == peaktable[unkn[l],1]){ + findunkn <- TRUE + pcgrp <- helpannotation[[a]][r,"pspvector"] + par (mar=c(0, 0, 0, 0) + 0.1) + #Write title + plot.new() + box() + text(0.5, 0.5, title1, cex=2) + par (mar=c(3, 2.5, 3, 1.5) + 0.1) + #Window for EIC + plotEICs(an, pspec=pcgrp, maxlabel=2) + #Window for pseudospectra + plotPsSpectrum(an, pspec=pcgrp, maxlabel=2) + } + } + if(!findunkn) { + par (mar=c(0, 0, 0, 0) + 0.1) + #Write title + plot.new() + box() + text(0.5, 0.5, title1, cex=2) + #Window for EIC + plot.new() + box() + text(0.5, 0.5, "NOT FOUND", cex=2) + #Window for pseudospectra + plot.new() + box() + text(0.5, 0.5, "NOT FOUND", cex=2) + } + break + } + } + } + graphics.off() + }#end for unkn[l] +}#end function \ No newline at end of file