Mercurial > repos > yguitton > metams_rungc
view lib_metams.r @ 0:2066efbafd7c draft
planemo upload for repository https://github.com/workflow4metabolomics/metaMS commit 6384fcf4496a64affe0b8a173c3f7ea09a275ffb
author | yguitton |
---|---|
date | Wed, 13 Jul 2016 06:46:45 -0400 |
parents | |
children | c75532b75ba1 |
line wrap: on
line source
# lib_metams.r version 0.99.6 # R function for metaMS runGC under W4M # author Yann GUITTON CNRS IRISA/LINA Idealg project 2014-2015 ##ADDITIONS FROM Y. Guitton getBPC <- function(file,rtcor=NULL, ...) { object <- xcmsRaw(file) sel <- profRange(object, ...) cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) } getBPC2s <- function (files, pdfname="BPCs.pdf", rt = c("raw","corrected"), scanrange=NULL) { require(xcms) #create sampleMetadata, get sampleMetadata and class sampleMetadata<-xcms:::phenoDataFromPaths(files) class<-class<-as.vector(levels(sampleMetadata[,"class"])) #create phenoData like table classnames<-vector("list",length(class)) for (i in 1:length(class)){ classnames[[i]]<-which( sampleMetadata[,1]==class[i]) } N <- dim(sampleMetadata)[1] TIC <- vector("list",N) for (j in 1:N) { cat(files[j],"\n") TIC[[j]] <- getBPC(files[j]) #good for raw # seems strange for corrected #errors if scanrange used in xcmsSetgeneration if (!is.null(xcmsSet) && rt == "corrected") rtcor <- xcmsSet@rt$corrected[[j]] else rtcor <- NULL TIC[[j]] <- getBPC(files[j],rtcor=rtcor) } pdf(pdfname,w=16,h=10) cols <- rainbow(N) lty = 1:N pch = 1:N #search for max x and max y in BPCs xlim = range(sapply(TIC, function(x) range(x[,1]))) ylim = range(sapply(TIC, function(x) range(x[,2]))) ylim = c(-ylim[2], ylim[2]) ##plot start if (length(class)>2){ for (k in 1:(length(class)-1)){ for (l in (k+1):length(class)){ print(paste(class[k],"vs",class[l],sep=" ")) plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") colvect<-NULL for (j in 1:length(classnames[[k]])) { tic <- TIC[[classnames[[k]][j]]] # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") colvect<-append(colvect,cols[classnames[[k]][j]]) } for (j in 1:length(classnames[[l]])) { # i=class2names[j] tic <- TIC[[classnames[[l]][j]]] points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") colvect<-append(colvect,cols[classnames[[l]][j]]) } legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) } } }#end if length >2 if (length(class)==2){ k=1 l=2 colvect<-NULL plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") for (j in 1:length(classnames[[k]])) { tic <- TIC[[classnames[[k]][j]]] # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") colvect<-append(colvect,cols[classnames[[k]][j]]) } for (j in 1:length(classnames[[l]])) { # i=class2names[j] tic <- TIC[[classnames[[l]][j]]] points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") colvect<-append(colvect,cols[classnames[[l]][j]]) } legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) }#end length ==2 if (length(class)==1){ k=1 ylim = range(sapply(TIC, function(x) range(x[,2]))) colvect<-NULL plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") for (j in 1:length(classnames[[k]])) { tic <- TIC[[classnames[[k]][j]]] # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") colvect<-append(colvect,cols[classnames[[k]][j]]) } legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) }#end length ==1 dev.off() # invisible(TIC) } getTIC <- function(file,rtcor=NULL) { object <- xcmsRaw(file) cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) } ## ## overlay TIC from all files in current folder or from xcmsSet, create pdf ## getTIC2s <- function(files, pdfname="TICs.pdf", rt=c("raw","corrected")) { #create sampleMetadata, get sampleMetadata and class sampleMetadata<-xcms:::phenoDataFromPaths(files) class<-class<-as.vector(levels(sampleMetadata[,"class"])) #create phenoData like table classnames<-vector("list",length(class)) for (i in 1:length(class)){ classnames[[i]]<-which( sampleMetadata[,1]==class[i]) } N <- dim(sampleMetadata)[1] TIC <- vector("list",N) for (i in 1:N) { cat(files[i],"\n") if (!is.null(xcmsSet) && rt == "corrected") rtcor <- xcmsSet@rt$corrected[[i]] else rtcor <- NULL TIC[[i]] <- getTIC(files[i],rtcor=rtcor) } pdf(pdfname,w=16,h=10) cols <- rainbow(N) lty = 1:N pch = 1:N #search for max x and max y in TICs xlim = range(sapply(TIC, function(x) range(x[,1]))) ylim = range(sapply(TIC, function(x) range(x[,2]))) ylim = c(-ylim[2], ylim[2]) ##plot start if (length(class)>2){ for (k in 1:(length(class)-1)){ for (l in (k+1):length(class)){ print(paste(class[k],"vs",class[l],sep=" ")) plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") colvect<-NULL for (j in 1:length(classnames[[k]])) { tic <- TIC[[classnames[[k]][j]]] # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") colvect<-append(colvect,cols[classnames[[k]][j]]) } for (j in 1:length(classnames[[l]])) { # i=class2names[j] tic <- TIC[[classnames[[l]][j]]] points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") colvect<-append(colvect,cols[classnames[[l]][j]]) } legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) } } }#end if length >2 if (length(class)==2){ k=1 l=2 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") colvect<-NULL for (j in 1:length(classnames[[k]])) { tic <- TIC[[classnames[[k]][j]]] # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") colvect<-append(colvect,cols[classnames[[k]][j]]) } for (j in 1:length(classnames[[l]])) { # i=class2names[j] tic <- TIC[[classnames[[l]][j]]] points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") colvect<-append(colvect,cols[classnames[[l]][j]]) } legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) }#end length ==2 if (length(class)==1){ k=1 ylim = range(sapply(TIC, function(x) range(x[,2]))) plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") colvect<-NULL for (j in 1:length(classnames[[k]])) { tic <- TIC[[classnames[[k]][j]]] # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") colvect<-append(colvect,cols[classnames[[k]][j]]) } legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) }#end length ==1 dev.off() # invisible(TIC) } ##addition for quality control of peak picking #metaMS EIC and pspectra plotting option #version 20150512 #only for Galaxy plotUnknowns<-function(resGC, unkn=""){ ##Annotation table each value is a pcgrp associated to the unknown ##NOTE pcgrp index are different between xcmsSet and resGC due to filtering steps in metaMS ##R. Wehrens give me some clues on that and we found a correction mat<-matrix(ncol=length(resGC$xset), nrow=dim(resGC$PeakTable)[1]) for (j in 1: length(resGC$xset)){ test<-resGC$annotation[[j]] print(paste("j=",j)) for (i in 1:dim(test)[1]){ if (as.numeric(row.names(test)[i])>dim(mat)[1]){ next } else { mat[as.numeric(row.names(test)[i]),j]<-test[i,1] } } } colnames(mat)<-colnames(resGC$PeakTable[,c((which(colnames(resGC$PeakTable)=="rt"|colnames(resGC$PeakTable)=="RI")[length(which(colnames(resGC$PeakTable)=="rt"|colnames(resGC$PeakTable)=="RI"))]+1):dim(resGC$PeakTable)[2])]) #debug # print(dim(mat)) # print(mat[1:3,]) # write.table(mat, file="myannotationtable.tsv", sep="\t", row.names=FALSE) #correction of annotation matrix due to pcgrp removal by quality check in runGCresult #matrix of correspondance between an@pspectra and filtered pspectra from runGC allPCGRPs <- lapply(1:length(resGC$xset), function(i) { an <- resGC$xset[[i]] huhn <- an@pspectra[which(sapply(an@pspectra, length) >= metaSetting(resGC$settings, "DBconstruction.minfeat"))] matCORR<-cbind(1:length(huhn), match(huhn, an@pspectra)) }) if (unkn[1]==""){ #plot EIC and spectra for all unknown for comparative purpose par (mar=c(5, 4, 4, 2) + 0.1) for (l in 1:dim(resGC$PeakTable)[1]){ #l=2 #recordPlot perpage=3 #if change change layout also! num.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page my.plots <- vector(num.plots, mode='list') dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",l,".pdf", sep="")) #A4 pdf # par(mfrow=c(perpage,2)) layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage)) # layout.show(6) oma.saved <- par("oma") par(oma = rep.int(0, 4)) par(oma = oma.saved) o.par <- par(mar = rep.int(0, 4)) on.exit(par(o.par)) stop=0 #initialize for (i in 1:num.plots) { start=stop+1 stop=start+perpage-1 # for (c in start:stop){ if (c <=dim(mat)[2]){ #get sample name sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths) #remove .cdf, .mzXML filepattern filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") sampname<-gsub(filepattern, "",sampname) title1<-paste("unknown", l,"from",sampname, sep=" ") an<-resGC$xset[[c]] par (mar=c(0, 0, 0, 0) + 0.1) plot.new() box() text(0.5, 0.5, title1, cex=2) if (!is.na(mat[l,c])){ pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[l,c]),2] if (pcgrp!=mat[l,c]) print ("pcgrp changed") par (mar=c(3, 2.5, 3, 1.5) + 0.1) plotEICs(an, pspec=pcgrp, maxlabel=2) plotPsSpectrum(an, pspec=pcgrp, maxlabel=2) } else { plot.new() box() text(0.5, 0.5, "NOT FOUND", cex=2) plot.new() box() text(0.5, 0.5, "NOT FOUND", cex=2) } } } # my.plots[[i]] <- recordPlot() } graphics.off() # pdf(file=paste("Unknown_",l,".pdf", sep=""), onefile=TRUE) # for (my.plot in my.plots) { # replayPlot(my.plot) # } # my.plots # graphics.off() }#end for l }#end if unkn="" else{ par (mar=c(5, 4, 4, 2) + 0.1) l=unkn if (length(l)==1){ #recordPlot perpage=3 #if change change layout also! num.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page my.plots <- vector(num.plots, mode='list') dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",l,".pdf", sep="")) #A4 pdf # par(mfrow=c(perpage,2)) layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage)) # layout.show(6) oma.saved <- par("oma") par(oma = rep.int(0, 4)) par(oma = oma.saved) o.par <- par(mar = rep.int(0, 4)) on.exit(par(o.par)) stop=0 #initialize for (i in 1:num.plots) { start=stop+1 stop=start+perpage-1 # for (c in start:stop){ if (c <=dim(mat)[2]){ #get sample name sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths) #remove .cdf, .mzXML filepattern filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") sampname<-gsub(filepattern, "",sampname) title1<-paste("unknown", l,"from",sampname, sep=" ") an<-resGC$xset[[c]] par (mar=c(0, 0, 0, 0) + 0.1) plot.new() box() text(0.5, 0.5, title1, cex=2) if (!is.na(mat[l,c])){ pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[l,c]),2] if (pcgrp!=mat[l,c]) print ("pcgrp changed") par (mar=c(3, 2.5, 3, 1.5) + 0.1) plotEICs(an, pspec=pcgrp, maxlabel=2) plotPsSpectrum(an, pspec=pcgrp, maxlabel=2) } else { plot.new() box() text(0.5, 0.5, "NOT FOUND", cex=2) plot.new() box() text(0.5, 0.5, "NOT FOUND", cex=2) } } } # my.plots[[i]] <- recordPlot() } graphics.off() # pdf(file=paste("Unknown_",l,".pdf", sep=""), onefile=TRUE) # for (my.plot in my.plots) { # replayPlot(my.plot) # } # my.plots # graphics.off() } else { par (mar=c(5, 4, 4, 2) + 0.1) for (l in 1:length(unkn)){ #l=2 #recordPlot perpage=3 #if change change layout also! num.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page my.plots <- vector(num.plots, mode='list') dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",unkn[l],".pdf", sep="")) #A4 pdf # par(mfrow=c(perpage,2)) layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage)) # layout.show(6) oma.saved <- par("oma") par(oma = rep.int(0, 4)) par(oma = oma.saved) o.par <- par(mar = rep.int(0, 4)) on.exit(par(o.par)) stop=0 #initialize for (i in 1:num.plots) { start=stop+1 stop=start+perpage-1 # for (c in start:stop){ if (c <=dim(mat)[2]){ #get sample name sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths) #remove .cdf, .mzXML filepattern filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") sampname<-gsub(filepattern, "",sampname) title1<-paste("unknown",unkn[l],"from",sampname, sep=" ") an<-resGC$xset[[c]] par (mar=c(0, 0, 0, 0) + 0.1) plot.new() box() text(0.5, 0.5, title1, cex=2) if (!is.na(mat[unkn[l],c])){ pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[unkn[l],c]),2] if (pcgrp!=mat[unkn[l],c]) print ("pcgrp changed") par (mar=c(3, 2.5, 3, 1.5) + 0.1) plotEICs(an, pspec=pcgrp, maxlabel=2) plotPsSpectrum(an, pspec=pcgrp, maxlabel=2) } else { plot.new() box() text(0.5, 0.5, "NOT FOUND", cex=2) plot.new() box() text(0.5, 0.5, "NOT FOUND", cex=2) } } } # my.plots[[i]] <- recordPlot() } graphics.off() # pdf(file=paste("Unknown_",unkn[l],".pdf", sep=""), onefile=TRUE) # for (my.plot in my.plots) { # replayPlot(my.plot) # } # my.plots # graphics.off() }#end for unkn[l] } } } #end function