diff lib_metams.r @ 0:2066efbafd7c draft

planemo upload for repository https://github.com/workflow4metabolomics/metaMS commit 6384fcf4496a64affe0b8a173c3f7ea09a275ffb
author yguitton
date Wed, 13 Jul 2016 06:46:45 -0400
parents
children c75532b75ba1
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib_metams.r	Wed Jul 13 06:46:45 2016 -0400
@@ -0,0 +1,493 @@
+# lib_metams.r version 0.99.6
+# R function for metaMS runGC under W4M
+# author Yann GUITTON CNRS IRISA/LINA Idealg project 2014-2015
+
+
+##ADDITIONS FROM Y. Guitton
+getBPC <- function(file,rtcor=NULL, ...) {
+     object <- xcmsRaw(file)
+	 sel <- profRange(object, ...)
+	 cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE]))
+    
+}
+
+getBPC2s <- function (files, pdfname="BPCs.pdf", rt = c("raw","corrected"), scanrange=NULL) {
+    require(xcms)
+
+        
+                            
+    #create sampleMetadata, get sampleMetadata and class
+    sampleMetadata<-xcms:::phenoDataFromPaths(files)
+    class<-class<-as.vector(levels(sampleMetadata[,"class"])) #create phenoData like table
+    classnames<-vector("list",length(class))
+    for (i in 1:length(class)){
+        classnames[[i]]<-which( sampleMetadata[,1]==class[i])
+    }
+        
+    N <- dim(sampleMetadata)[1]
+      
+  
+   
+    TIC <- vector("list",N)
+
+
+    for (j in 1:N) {
+
+        cat(files[j],"\n")
+        TIC[[j]] <- getBPC(files[j])
+        #good for raw 
+        # seems strange for corrected
+        #errors if scanrange used in xcmsSetgeneration
+        if (!is.null(xcmsSet) && rt == "corrected")
+          rtcor <- xcmsSet@rt$corrected[[j]] else
+          rtcor <- NULL
+          TIC[[j]] <- getBPC(files[j],rtcor=rtcor)
+    }
+
+    pdf(pdfname,w=16,h=10)
+    cols <- rainbow(N)
+    lty = 1:N
+    pch = 1:N
+    #search for max x and max y in BPCs
+    xlim = range(sapply(TIC, function(x) range(x[,1])))
+    ylim = range(sapply(TIC, function(x) range(x[,2])))
+    ylim = c(-ylim[2], ylim[2])
+
+
+    ##plot start
+    
+    if (length(class)>2){
+      for (k in 1:(length(class)-1)){
+        for (l in (k+1):length(class)){
+            print(paste(class[k],"vs",class[l],sep=" ")) 
+            plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
+            colvect<-NULL
+           for (j in 1:length(classnames[[k]])) {
+      
+              tic <- TIC[[classnames[[k]][j]]]
+              # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
+              points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
+              colvect<-append(colvect,cols[classnames[[k]][j]])
+            }
+          for (j in 1:length(classnames[[l]])) {
+          # i=class2names[j]
+          tic <- TIC[[classnames[[l]][j]]]
+          points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
+          colvect<-append(colvect,cols[classnames[[l]][j]])
+          }
+          legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
+        }
+      }
+    }#end if length >2
+    if (length(class)==2){
+        k=1
+		l=2
+        colvect<-NULL
+        plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
+
+       for (j in 1:length(classnames[[k]])) {
+
+          tic <- TIC[[classnames[[k]][j]]]
+          # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
+          points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
+          colvect<-append(colvect,cols[classnames[[k]][j]])
+        }
+      for (j in 1:length(classnames[[l]])) {
+          # i=class2names[j]
+          tic <- TIC[[classnames[[l]][j]]]
+          points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
+          colvect<-append(colvect,cols[classnames[[l]][j]])
+      }
+      legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
+
+    }#end length ==2
+       if (length(class)==1){
+        k=1
+		ylim = range(sapply(TIC, function(x) range(x[,2])))
+        colvect<-NULL
+        plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
+
+       for (j in 1:length(classnames[[k]])) {
+
+          tic <- TIC[[classnames[[k]][j]]]
+          # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
+          points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
+          colvect<-append(colvect,cols[classnames[[k]][j]])
+        }
+      
+      legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
+
+    }#end length ==1
+    dev.off()
+
+    # invisible(TIC)
+}
+
+getTIC <- function(file,rtcor=NULL) {
+     object <- xcmsRaw(file)
+     cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity)
+}
+
+##
+##  overlay TIC from all files in current folder or from xcmsSet, create pdf
+##
+getTIC2s <- function(files, pdfname="TICs.pdf", rt=c("raw","corrected")) {
+         
+    #create sampleMetadata, get sampleMetadata and class
+    sampleMetadata<-xcms:::phenoDataFromPaths(files)
+    class<-class<-as.vector(levels(sampleMetadata[,"class"])) #create phenoData like table
+    classnames<-vector("list",length(class))
+    for (i in 1:length(class)){
+        classnames[[i]]<-which( sampleMetadata[,1]==class[i])
+    }
+        
+    N <- dim(sampleMetadata)[1]
+    TIC <- vector("list",N)
+
+    for (i in 1:N) {
+        cat(files[i],"\n")
+        if (!is.null(xcmsSet) && rt == "corrected")
+            rtcor <- xcmsSet@rt$corrected[[i]]
+        else
+            rtcor <- NULL
+        TIC[[i]] <- getTIC(files[i],rtcor=rtcor)
+    }
+ 
+    pdf(pdfname,w=16,h=10)
+    cols <- rainbow(N)
+    lty = 1:N
+    pch = 1:N
+    #search for max x and max y in TICs
+    xlim = range(sapply(TIC, function(x) range(x[,1])))
+    ylim = range(sapply(TIC, function(x) range(x[,2])))
+    ylim = c(-ylim[2], ylim[2])
+	  
+	  
+    ##plot start
+    if (length(class)>2){
+        for (k in 1:(length(class)-1)){
+            for (l in (k+1):length(class)){
+                print(paste(class[k],"vs",class[l],sep=" ")) 
+                plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
+                colvect<-NULL
+                for (j in 1:length(classnames[[k]])) {
+
+                    tic <- TIC[[classnames[[k]][j]]]
+                    # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
+                    points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
+                    colvect<-append(colvect,cols[classnames[[k]][j]])
+                }
+                for (j in 1:length(classnames[[l]])) {
+                    # i=class2names[j]
+                    tic <- TIC[[classnames[[l]][j]]]
+                    points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
+                    colvect<-append(colvect,cols[classnames[[l]][j]])
+                }
+                legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
+            }
+        }
+    }#end if length >2
+    if (length(class)==2){
+
+
+        k=1
+        l=2
+		
+        plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
+        colvect<-NULL
+        for (j in 1:length(classnames[[k]])) {
+
+            tic <- TIC[[classnames[[k]][j]]]
+            # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
+            points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
+            colvect<-append(colvect,cols[classnames[[k]][j]])
+        }
+        for (j in 1:length(classnames[[l]])) {
+            # i=class2names[j]
+            tic <- TIC[[classnames[[l]][j]]]
+            points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
+            colvect<-append(colvect,cols[classnames[[l]][j]])
+        }
+        legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
+
+    }#end length ==2
+    if (length(class)==1){
+        k=1
+        ylim = range(sapply(TIC, function(x) range(x[,2])))
+		
+        plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
+        colvect<-NULL
+        for (j in 1:length(classnames[[k]])) {
+
+            tic <- TIC[[classnames[[k]][j]]]
+            # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
+            points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
+            colvect<-append(colvect,cols[classnames[[k]][j]])
+        }
+
+        legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
+
+    }#end length ==1
+    dev.off()
+
+    # invisible(TIC)
+}
+
+
+##addition for quality control of peak picking
+#metaMS EIC and pspectra plotting option
+#version 20150512
+#only for Galaxy 
+
+plotUnknowns<-function(resGC, unkn=""){
+
+
+    ##Annotation table each value is a pcgrp associated to the unknown 
+    ##NOTE pcgrp index are different between xcmsSet and resGC due to filtering steps in metaMS
+    ##R. Wehrens give me some clues on that and we found a correction
+ 
+    mat<-matrix(ncol=length(resGC$xset), nrow=dim(resGC$PeakTable)[1])
+     
+    for (j in 1: length(resGC$xset)){
+        test<-resGC$annotation[[j]]
+        print(paste("j=",j))
+        for (i in 1:dim(test)[1]){
+            if (as.numeric(row.names(test)[i])>dim(mat)[1]){
+                next
+            } else {
+                mat[as.numeric(row.names(test)[i]),j]<-test[i,1]
+            }
+        }
+    }
+    colnames(mat)<-colnames(resGC$PeakTable[,c((which(colnames(resGC$PeakTable)=="rt"|colnames(resGC$PeakTable)=="RI")[length(which(colnames(resGC$PeakTable)=="rt"|colnames(resGC$PeakTable)=="RI"))]+1):dim(resGC$PeakTable)[2])])
+    
+    #debug
+
+    # print(dim(mat))
+    # print(mat[1:3,]) 
+    # write.table(mat, file="myannotationtable.tsv", sep="\t", row.names=FALSE)
+    #correction of annotation matrix due to pcgrp removal by quality check in runGCresult
+    #matrix of correspondance between an@pspectra and filtered pspectra from runGC
+
+    allPCGRPs <-
+        lapply(1:length(resGC$xset),
+            function(i) {
+                an <- resGC$xset[[i]]
+                huhn <- an@pspectra[which(sapply(an@pspectra, length) >=
+                metaSetting(resGC$settings,
+                "DBconstruction.minfeat"))]
+                matCORR<-cbind(1:length(huhn), match(huhn, an@pspectra))
+            })
+
+    if (unkn[1]==""){    
+    #plot EIC and spectra for all unknown for comparative purpose
+   
+
+        par (mar=c(5, 4, 4, 2) + 0.1)
+        for (l in 1:dim(resGC$PeakTable)[1]){ #l=2
+            #recordPlot
+            perpage=3 #if change change layout also!
+            num.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page
+            my.plots <- vector(num.plots, mode='list')
+            dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",l,".pdf", sep="")) #A4 pdf
+            # par(mfrow=c(perpage,2))
+            layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage))
+            # layout.show(6)
+            oma.saved <- par("oma")
+            par(oma = rep.int(0, 4))
+            par(oma = oma.saved)
+            o.par <- par(mar = rep.int(0, 4))
+            on.exit(par(o.par))
+            stop=0 #initialize
+            for (i in 1:num.plots) {
+                start=stop+1
+                stop=start+perpage-1 #
+                for (c in start:stop){
+                    if (c <=dim(mat)[2]){
+                            
+                        #get sample name
+                        sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths)
+
+                        #remove .cdf, .mzXML filepattern
+                        filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", 
+                                "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
+                        filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), 
+                                collapse = "|")
+                        sampname<-gsub(filepattern, "",sampname)
+                         
+                        title1<-paste("unknown", l,"from",sampname, sep=" ")
+                        an<-resGC$xset[[c]]
+                         
+                        par (mar=c(0, 0, 0, 0) + 0.1)
+                        plot.new()
+                        box()
+                        text(0.5, 0.5, title1, cex=2)
+                        if (!is.na(mat[l,c])){
+                            pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[l,c]),2]
+                            if (pcgrp!=mat[l,c]) print ("pcgrp changed")
+                            par (mar=c(3, 2.5, 3, 1.5) + 0.1)
+                            plotEICs(an, pspec=pcgrp, maxlabel=2)
+                            plotPsSpectrum(an, pspec=pcgrp, maxlabel=2)
+                        } else {
+                            plot.new()
+                            box()
+                            text(0.5, 0.5, "NOT FOUND", cex=2)
+                            plot.new()
+                            box()
+                            text(0.5, 0.5, "NOT FOUND", cex=2)
+                        }
+                    }
+                 }
+                # my.plots[[i]] <- recordPlot()
+            }
+            graphics.off()
+
+                # pdf(file=paste("Unknown_",l,".pdf", sep=""), onefile=TRUE)
+                # for (my.plot in my.plots) {
+                    # replayPlot(my.plot)
+                # }
+                # my.plots
+                # graphics.off()
+
+        }#end  for l
+    }#end if unkn=""
+    else{
+        par (mar=c(5, 4, 4, 2) + 0.1)
+        l=unkn
+        if (length(l)==1){
+            #recordPlot
+            perpage=3 #if change change layout also!
+            num.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page
+            my.plots <- vector(num.plots, mode='list')
+            
+            dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",l,".pdf", sep="")) #A4 pdf
+            # par(mfrow=c(perpage,2))
+            layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage))
+            # layout.show(6)
+            oma.saved <- par("oma")
+            par(oma = rep.int(0, 4))
+            par(oma = oma.saved)
+            o.par <- par(mar = rep.int(0, 4))
+            on.exit(par(o.par))
+            stop=0 #initialize
+            for (i in 1:num.plots) {
+                start=stop+1
+                stop=start+perpage-1 #
+                for (c in start:stop){
+                    if (c <=dim(mat)[2]){
+                            
+                        #get sample name
+                        sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths)
+
+                        #remove .cdf, .mzXML filepattern
+                        filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
+                        filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
+                        sampname<-gsub(filepattern, "",sampname)
+                         
+                        title1<-paste("unknown", l,"from",sampname, sep=" ")
+                        an<-resGC$xset[[c]]
+                         
+                        par (mar=c(0, 0, 0, 0) + 0.1)
+                        plot.new()
+                        box()
+                        text(0.5, 0.5, title1, cex=2)
+                        if (!is.na(mat[l,c])){
+                            pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[l,c]),2]
+                            if (pcgrp!=mat[l,c]) print ("pcgrp changed")
+                            par (mar=c(3, 2.5, 3, 1.5) + 0.1)
+                            plotEICs(an, pspec=pcgrp, maxlabel=2)
+                            plotPsSpectrum(an, pspec=pcgrp, maxlabel=2)
+                        } else {
+                            plot.new()
+                            box()
+                            text(0.5, 0.5, "NOT FOUND", cex=2)
+                            plot.new()
+                            box()
+                            text(0.5, 0.5, "NOT FOUND", cex=2)
+                        }
+                    }
+                }
+                # my.plots[[i]] <- recordPlot()
+            }
+            graphics.off()
+
+            # pdf(file=paste("Unknown_",l,".pdf", sep=""), onefile=TRUE)
+            # for (my.plot in my.plots) {
+                # replayPlot(my.plot)
+            # }
+            # my.plots
+            # graphics.off()
+        } else {
+            par (mar=c(5, 4, 4, 2) + 0.1)
+            for (l in 1:length(unkn)){ #l=2
+                #recordPlot
+                perpage=3 #if change change layout also!
+                num.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page
+                my.plots <- vector(num.plots, mode='list')
+                dev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",unkn[l],".pdf", sep="")) #A4 pdf
+                # par(mfrow=c(perpage,2))
+                layout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage))
+                # layout.show(6)
+                oma.saved <- par("oma")
+                par(oma = rep.int(0, 4))
+                par(oma = oma.saved)
+                o.par <- par(mar = rep.int(0, 4))
+                on.exit(par(o.par))
+                stop=0 #initialize
+                for (i in 1:num.plots) {
+                    start=stop+1
+                    stop=start+perpage-1 #
+                    for (c in start:stop){
+                        if (c <=dim(mat)[2]){
+                        
+                            #get sample name
+                            sampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths)
+
+                            #remove .cdf, .mzXML filepattern
+                            filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
+                            filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
+                            sampname<-gsub(filepattern, "",sampname)
+                             
+                            title1<-paste("unknown",unkn[l],"from",sampname, sep=" ")
+                            an<-resGC$xset[[c]]
+                             
+                            par (mar=c(0, 0, 0, 0) + 0.1)
+                            plot.new()
+                            box()
+                            text(0.5, 0.5, title1, cex=2)
+                            if (!is.na(mat[unkn[l],c])){
+                                pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[unkn[l],c]),2]
+                                if (pcgrp!=mat[unkn[l],c]) print ("pcgrp changed")
+                                par (mar=c(3, 2.5, 3, 1.5) + 0.1)
+                                plotEICs(an, pspec=pcgrp, maxlabel=2)
+                                plotPsSpectrum(an, pspec=pcgrp, maxlabel=2)
+                            } else {
+                                plot.new()
+                                box()
+                                text(0.5, 0.5, "NOT FOUND", cex=2)
+                                plot.new()
+                                box()
+                                text(0.5, 0.5, "NOT FOUND", cex=2)
+                            }
+                        }
+                    }
+                    # my.plots[[i]] <- recordPlot()
+                }
+                graphics.off()
+
+                # pdf(file=paste("Unknown_",unkn[l],".pdf", sep=""), onefile=TRUE)
+                # for (my.plot in my.plots) {
+                    # replayPlot(my.plot)
+                # }
+                # my.plots
+                # graphics.off()
+
+            }#end  for unkn[l]
+        
+        }
+    
+    }
+} #end function 
+
+
+