Mercurial > repos > yguitton > metams_rungc
diff lib_metams.r @ 0:2066efbafd7c draft
planemo upload for repository https://github.com/workflow4metabolomics/metaMS commit 6384fcf4496a64affe0b8a173c3f7ea09a275ffb
author | yguitton |
---|---|
date | Wed, 13 Jul 2016 06:46:45 -0400 |
parents | |
children | c75532b75ba1 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib_metams.r Wed Jul 13 06:46:45 2016 -0400 @@ -0,0 +1,493 @@ +# lib_metams.r version 0.99.6 +# R function for metaMS runGC under W4M +# author Yann GUITTON CNRS IRISA/LINA Idealg project 2014-2015 + + +##ADDITIONS FROM Y. Guitton +getBPC <- function(file,rtcor=NULL, ...) { + object <- xcmsRaw(file) + sel <- profRange(object, ...) + cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) + +} + +getBPC2s <- function (files, pdfname="BPCs.pdf", rt = c("raw","corrected"), scanrange=NULL) { + require(xcms) + + + + #create sampleMetadata, get sampleMetadata and class + sampleMetadata<-xcms:::phenoDataFromPaths(files) + class<-class<-as.vector(levels(sampleMetadata[,"class"])) #create phenoData like table + classnames<-vector("list",length(class)) + for (i in 1:length(class)){ + classnames[[i]]<-which( sampleMetadata[,1]==class[i]) + } + + N <- dim(sampleMetadata)[1] + + + + TIC <- vector("list",N) + + + for (j in 1:N) { + + cat(files[j],"\n") + TIC[[j]] <- getBPC(files[j]) + #good for raw + # seems strange for corrected + #errors if scanrange used in xcmsSetgeneration + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[j]] else + rtcor <- NULL + TIC[[j]] <- getBPC(files[j],rtcor=rtcor) + } + + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in BPCs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) + + + ##plot start + + if (length(class)>2){ + for (k in 1:(length(class)-1)){ + for (l in (k+1):length(class)){ + print(paste(class[k],"vs",class[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + if (length(class)==2){ + k=1 + l=2 + colvect<-NULL + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + + for (j in 1:length(classnames[[k]])) { + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==2 + if (length(class)==1){ + k=1 + ylim = range(sapply(TIC, function(x) range(x[,2]))) + colvect<-NULL + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + + for (j in 1:length(classnames[[k]])) { + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + + legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==1 + dev.off() + + # invisible(TIC) +} + +getTIC <- function(file,rtcor=NULL) { + object <- xcmsRaw(file) + cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) +} + +## +## overlay TIC from all files in current folder or from xcmsSet, create pdf +## +getTIC2s <- function(files, pdfname="TICs.pdf", rt=c("raw","corrected")) { + + #create sampleMetadata, get sampleMetadata and class + sampleMetadata<-xcms:::phenoDataFromPaths(files) + class<-class<-as.vector(levels(sampleMetadata[,"class"])) #create phenoData like table + classnames<-vector("list",length(class)) + for (i in 1:length(class)){ + classnames[[i]]<-which( sampleMetadata[,1]==class[i]) + } + + N <- dim(sampleMetadata)[1] + TIC <- vector("list",N) + + for (i in 1:N) { + cat(files[i],"\n") + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[i]] + else + rtcor <- NULL + TIC[[i]] <- getTIC(files[i],rtcor=rtcor) + } + + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in TICs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) + + + ##plot start + if (length(class)>2){ + for (k in 1:(length(class)-1)){ + for (l in (k+1):length(class)){ + print(paste(class[k],"vs",class[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + if (length(class)==2){ + + + k=1 + l=2 + + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==2 + if (length(class)==1){ + k=1 + ylim = range(sapply(TIC, function(x) range(x[,2]))) + + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + + legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==1 + dev.off() + + # invisible(TIC) +} + + +##addition for quality control of peak picking +#metaMS EIC and pspectra plotting option +#version 20150512 +#only for Galaxy + +plotUnknowns<-function(resGC, unkn=""){ + + + ##Annotation table each value is a pcgrp associated to the unknown + ##NOTE pcgrp index are different between xcmsSet and resGC due to filtering steps in metaMS + ##R. Wehrens give me some clues on that and we found a correction + + mat<-matrix(ncol=length(resGC$xset), nrow=dim(resGC$PeakTable)[1]) + + for (j in 1: length(resGC$xset)){ + test<-resGC$annotation[[j]] + print(paste("j=",j)) + for (i in 1:dim(test)[1]){ + if (as.numeric(row.names(test)[i])>dim(mat)[1]){ + next + } else { + mat[as.numeric(row.names(test)[i]),j]<-test[i,1] + } + } + } + colnames(mat)<-colnames(resGC$PeakTable[,c((which(colnames(resGC$PeakTable)=="rt"|colnames(resGC$PeakTable)=="RI")[length(which(colnames(resGC$PeakTable)=="rt"|colnames(resGC$PeakTable)=="RI"))]+1):dim(resGC$PeakTable)[2])]) + + #debug + + # print(dim(mat)) + # print(mat[1:3,]) + # write.table(mat, file="myannotationtable.tsv", sep="\t", row.names=FALSE) + #correction of annotation matrix due to pcgrp removal by quality check in runGCresult + #matrix of correspondance between an@pspectra and filtered pspectra from runGC + + allPCGRPs <- + lapply(1:length(resGC$xset), + function(i) { + an <- resGC$xset[[i]] + huhn <- an@pspectra[which(sapply(an@pspectra, length) >= + metaSetting(resGC$settings, + "DBconstruction.minfeat"))] + matCORR<-cbind(1:length(huhn), match(huhn, an@pspectra)) + }) + + if (unkn[1]==""){ + #plot EIC and spectra for all unknown for comparative purpose + + + par (mar=c(5, 4, 4, 2) + 0.1) + for (l in 1:dim(resGC$PeakTable)[1]){ #l=2 + #recordPlot + perpage=3 #if change change layout also! + num.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page + my.plots <- vector(num.plots, mode='list') + dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",l,".pdf", sep="")) #A4 pdf + # par(mfrow=c(perpage,2)) + layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage)) + # layout.show(6) + oma.saved <- par("oma") + par(oma = rep.int(0, 4)) + par(oma = oma.saved) + o.par <- par(mar = rep.int(0, 4)) + on.exit(par(o.par)) + stop=0 #initialize + for (i in 1:num.plots) { + start=stop+1 + stop=start+perpage-1 # + for (c in start:stop){ + if (c <=dim(mat)[2]){ + + #get sample name + sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths) + + #remove .cdf, .mzXML filepattern + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", + "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), + collapse = "|") + sampname<-gsub(filepattern, "",sampname) + + title1<-paste("unknown", l,"from",sampname, sep=" ") + an<-resGC$xset[[c]] + + par (mar=c(0, 0, 0, 0) + 0.1) + plot.new() + box() + text(0.5, 0.5, title1, cex=2) + if (!is.na(mat[l,c])){ + pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[l,c]),2] + if (pcgrp!=mat[l,c]) print ("pcgrp changed") + par (mar=c(3, 2.5, 3, 1.5) + 0.1) + plotEICs(an, pspec=pcgrp, maxlabel=2) + plotPsSpectrum(an, pspec=pcgrp, maxlabel=2) + } else { + plot.new() + box() + text(0.5, 0.5, "NOT FOUND", cex=2) + plot.new() + box() + text(0.5, 0.5, "NOT FOUND", cex=2) + } + } + } + # my.plots[[i]] <- recordPlot() + } + graphics.off() + + # pdf(file=paste("Unknown_",l,".pdf", sep=""), onefile=TRUE) + # for (my.plot in my.plots) { + # replayPlot(my.plot) + # } + # my.plots + # graphics.off() + + }#end for l + }#end if unkn="" + else{ + par (mar=c(5, 4, 4, 2) + 0.1) + l=unkn + if (length(l)==1){ + #recordPlot + perpage=3 #if change change layout also! + num.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page + my.plots <- vector(num.plots, mode='list') + + dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",l,".pdf", sep="")) #A4 pdf + # par(mfrow=c(perpage,2)) + layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage)) + # layout.show(6) + oma.saved <- par("oma") + par(oma = rep.int(0, 4)) + par(oma = oma.saved) + o.par <- par(mar = rep.int(0, 4)) + on.exit(par(o.par)) + stop=0 #initialize + for (i in 1:num.plots) { + start=stop+1 + stop=start+perpage-1 # + for (c in start:stop){ + if (c <=dim(mat)[2]){ + + #get sample name + sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths) + + #remove .cdf, .mzXML filepattern + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") + sampname<-gsub(filepattern, "",sampname) + + title1<-paste("unknown", l,"from",sampname, sep=" ") + an<-resGC$xset[[c]] + + par (mar=c(0, 0, 0, 0) + 0.1) + plot.new() + box() + text(0.5, 0.5, title1, cex=2) + if (!is.na(mat[l,c])){ + pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[l,c]),2] + if (pcgrp!=mat[l,c]) print ("pcgrp changed") + par (mar=c(3, 2.5, 3, 1.5) + 0.1) + plotEICs(an, pspec=pcgrp, maxlabel=2) + plotPsSpectrum(an, pspec=pcgrp, maxlabel=2) + } else { + plot.new() + box() + text(0.5, 0.5, "NOT FOUND", cex=2) + plot.new() + box() + text(0.5, 0.5, "NOT FOUND", cex=2) + } + } + } + # my.plots[[i]] <- recordPlot() + } + graphics.off() + + # pdf(file=paste("Unknown_",l,".pdf", sep=""), onefile=TRUE) + # for (my.plot in my.plots) { + # replayPlot(my.plot) + # } + # my.plots + # graphics.off() + } else { + par (mar=c(5, 4, 4, 2) + 0.1) + for (l in 1:length(unkn)){ #l=2 + #recordPlot + perpage=3 #if change change layout also! + num.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page + my.plots <- vector(num.plots, mode='list') + dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",unkn[l],".pdf", sep="")) #A4 pdf + # par(mfrow=c(perpage,2)) + layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage)) + # layout.show(6) + oma.saved <- par("oma") + par(oma = rep.int(0, 4)) + par(oma = oma.saved) + o.par <- par(mar = rep.int(0, 4)) + on.exit(par(o.par)) + stop=0 #initialize + for (i in 1:num.plots) { + start=stop+1 + stop=start+perpage-1 # + for (c in start:stop){ + if (c <=dim(mat)[2]){ + + #get sample name + sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths) + + #remove .cdf, .mzXML filepattern + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") + sampname<-gsub(filepattern, "",sampname) + + title1<-paste("unknown",unkn[l],"from",sampname, sep=" ") + an<-resGC$xset[[c]] + + par (mar=c(0, 0, 0, 0) + 0.1) + plot.new() + box() + text(0.5, 0.5, title1, cex=2) + if (!is.na(mat[unkn[l],c])){ + pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[unkn[l],c]),2] + if (pcgrp!=mat[unkn[l],c]) print ("pcgrp changed") + par (mar=c(3, 2.5, 3, 1.5) + 0.1) + plotEICs(an, pspec=pcgrp, maxlabel=2) + plotPsSpectrum(an, pspec=pcgrp, maxlabel=2) + } else { + plot.new() + box() + text(0.5, 0.5, "NOT FOUND", cex=2) + plot.new() + box() + text(0.5, 0.5, "NOT FOUND", cex=2) + } + } + } + # my.plots[[i]] <- recordPlot() + } + graphics.off() + + # pdf(file=paste("Unknown_",unkn[l],".pdf", sep=""), onefile=TRUE) + # for (my.plot in my.plots) { + # replayPlot(my.plot) + # } + # my.plots + # graphics.off() + + }#end for unkn[l] + + } + + } +} #end function + + +