xcms_fillpeaks: lib.r comparison

comparison lib.r @ 9:ee29f0a6e361 draft

planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 08e7f269a5c59687a7768be8db5fcb4e4d736093

author	lecorguille
date	Mon, 30 Jan 2017 08:53:48 -0500
parents	2edfa5e1f719
children	de0d85537ee3

comparison

equal deleted inserted replaced

-:cce30b3398e0
+:ee29f0a6e361
-# lib.r version="2.0.1"
 #Authors ABiMS TEAM
-#Lib.r for Galaxy Workflow4Metabo
+#Lib.r for Galaxy Workflow4Metabolomics xcms tools
+#
+#version 2.4: lecorguille
+#   add getPeaklistW4M
+#version 2.3: yguitton
+#   correction for empty PDF when only 1 class
 #version 2.2
-#Based on lib.r 2.1
+#   correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet
-#Modifications made by Guitton Yann
+#   Note if scanrange is used a warning is prompted in R console but do not stop PDF generation
-#correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet
+#version 2.1: yguitton
-#Note if scanrange is used a warning is prompted in R console but do not stop PDF generation
+#   Modifications made by Guitton Yann
+#@author G. Le Corguille
+#This function convert if it is required the Retention Time in minutes
+RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
+if (convertRTMinute){
+#converting the retention times (seconds) into minutes
+print("converting the retention times into minutes in the variableMetadata")
+variableMetadata[,"rt"]=variableMetadata[,"rt"]/60
+variableMetadata[,"rtmin"]=variableMetadata[,"rtmin"]/60
+variableMetadata[,"rtmax"]=variableMetadata[,"rtmax"]/60
+}
+return (variableMetadata)
+}
+#@author G. Le Corguille
+#This function format ions identifiers
+formatIonIdentifiers <- function(dataData, numDigitsRT=0, numDigitsMZ=0) {
+return(make.unique(paste0("M",round(dataData[,"mz"],numDigitsMZ),"T",round(dataData[,"rt"],numDigitsRT))))
+}
+#@author G. Le Corguille
+# value: intensity values to be used into, maxo or intb
+getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) {
+groups <- xset@groups
+values <- groupval(xset, "medret", value=intval)
+# renamming of the column rtmed to rt to fit with camera peaklist function output
+colnames(groups)[colnames(groups)=="rtmed"] <- "rt"
+colnames(groups)[colnames(groups)=="mzmed"] <- "mz"
+ids <- formatIonIdentifiers(groups, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)
+groups = RTSecondToMinute(groups, convertRTMinute)
+rownames(groups) = ids
+rownames(values) = ids
+#@TODO: add "name" as the first column name
+#colnames(groups)[1] = "name"
+#colnames(values)[1] = "name"
+write.table(groups, file=variableMetadataOutput,sep="\t",quote=F,row.names = T,col.names = NA)
+write.table(values, file=dataMatrixOutput,sep="\t",quote=F,row.names = T,col.names = NA)
+}
 #@author Y. Guitton
 getBPC <- function(file,rtcor=NULL, ...) {
 object <- xcmsRaw(file)
 sel <- profRange(object, ...)
 for (j in 1:N) {
 TIC[[j]] <- getBPC(files[j])
 #good for raw
 # seems strange for corrected
 #errors if scanrange used in xcmsSetgeneration
 if (!is.null(xcmsSet) && rt == "corrected")
 rtcor <- xcmsSet@rt$corrected[[j]] else
 rtcor <- NULL
 TIC[[j]] <- getBPC(files[j],rtcor=rtcor)
 # TIC[[j]][,1]<-rtcor
 }
 ylim = range(sapply(TIC, function(x) range(x[,2])))
 ylim = c(-ylim[2], ylim[2])
 ##plot start
 if (length(class)>2){
 for (k in 1:(length(class)-1)){
 for (l in (k+1):length(class)){
 #print(paste(class[k],"vs",class[l],sep=" "))
 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
 colvect<-NULL
 for (j in 1:length(classnames[[k]])) {
 tic <- TIC[[classnames[[k]][j]]]
 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
 }
 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
 }#end length ==2
+#case where only one class
+if (length(class)==1){
+k=1
+		ylim = range(sapply(TIC, function(x) range(x[,2])))
+colvect<-NULL
+plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
+for (j in 1:length(classnames[[k]])) {
+tic <- TIC[[classnames[[k]][j]]]
+# points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
+points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
+colvect<-append(colvect,cols[classnames[[k]][j]])
+}
+legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
+}#end length ==1
 dev.off() #pdf(pdfname,w=16,h=10)
 invisible(TIC)
 }
 classnames<-vector("list",length(class))
 for (i in 1:length(class)){
 classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i])
 }
 N <- length(files)
 TIC <- vector("list",N)
 for (i in 1:N) {
 if (!is.null(xcmsSet) && rt == "corrected")
 ##plot start
 if (length(class)>2){
 for (k in 1:(length(class)-1)){
 for (l in (k+1):length(class)){
 #print(paste(class[k],"vs",class[l],sep=" "))
 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
 colvect<-NULL
 for (j in 1:length(classnames[[k]])) {
 tic <- TIC[[classnames[[k]][j]]]
 colvect<-append(colvect,cols[classnames[[l]][j]])
 }
 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
 }#end length ==2
+#case where only one class
+if (length(class)==1){
+	  k=1
+	  ylim = range(sapply(TIC, function(x) range(x[,2])))
+	  plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
+colvect<-NULL
+		for (j in 1:length(classnames[[k]])) {
+tic <- TIC[[classnames[[k]][j]]]
+			# points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
+			points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
+colvect<-append(colvect,cols[classnames[[k]][j]])
+	  }
+		legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
+	}#end length ==1
 dev.off() #pdf(pdfname,w=16,h=10)
 invisible(TIC)
 }
 #Create the sampleMetada dataframe
 sampleMetadata=xset@phenoData
 sampleNamesOrigin=rownames(sampleMetadata)
 sampleNamesMakeNames=make.names(sampleNamesOrigin)
 if (any(duplicated(sampleNamesMakeNames))) {
 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())
 for (sampleName in sampleNamesOrigin) {
 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr())
 }
 library(tools)
 samplename=file_path_sans_ext(filename)
 #Set the polarity attribute
 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity
 #Delete xcmsRaw object because it creates a bug for the fillpeaks step
 rm(xcmsRaw)
 }
 }
 # WHAT IS ON THE FILESYSTEM
 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T)
 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
 # COMPARISON
 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
 }
 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
 write(capture, stderr())
 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files")
 }
 }
 ##
 ## This function check if XML contain special characters
 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
 deleteXmlBadCharacters<- function (directory) {
 cat("Checking Non ASCII characters in the XML...\n")
 processed=F
 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE)
 for (i in l){
 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="")
 capture=suppressWarnings(system(cmd,intern=TRUE))
 if (length(capture)>0){
 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i)
 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") )
 c=system(cmd,intern=TRUE)
 capture=""
 processed=T
 }
 }
 if (processed) cat("\n\n")
 return(processed)
 }
 ##
 ## This function will compute MD5 checksum to check the data integrity
 ##
 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
 getMd5sum <- function (directory) {
 cat("Compute md5 checksum...\n")
 #cat("\n\n")
 return(as.matrix(md5sum(files)))
 }

Mercurial > repos > lecorguille > xcms_fillpeaks

comparison lib.r @ 9:ee29f0a6e361 draft