Mercurial > repos > lecorguille > xcms_retcor

--- a/README.rst	Wed Jul 06 17:43:01 2016 -0400
+++ b/README.rst	Mon Jan 30 08:53:30 2017 -0500
@@ -2,6 +2,10 @@
 Changelog/News
 --------------

+**Version 2.0.8 - 22/12/2016**
+
+- BUGFIX: when having only one group (i.e. one folder of raw data) the BPC and TIC pdf files do not contain any graph
+
 **Version 2.0.7 - 06/07/2016**

 - UPGRADE: upgrate the xcms version from 1.44.0 to 1.46.0
@@ -10,7 +14,6 @@

 - TEST: refactoring to pass planemo test using conda dependencies

-
 **Version 2.0.5 - 10/02/2016**

 - BUGFIX: better management of errors. Datasets remained green although the process failed
@@ -37,5 +40,3 @@
 Planemo test using source env.sh: passed

 Planemo shed_test : passed
-
-
--- a/abims_xcms_retcor.xml	Wed Jul 06 17:43:01 2016 -0400
+++ b/abims_xcms_retcor.xml	Mon Jan 30 08:53:30 2017 -0500
@@ -1,4 +1,4 @@
-<tool id="abims_xcms_retcor" name="xcms.retcor" version="2.0.7">
+<tool id="abims_xcms_retcor" name="xcms.retcor" version="2.0.8">

     <description>Retention Time Correction using retcor function from xcms R package </description>

@@ -10,9 +10,9 @@
     <expand macro="stdio"/>

     <command><![CDATA[
-        @COMMAND_XCMS_SCRIPT@
-        image $image
-        xfunction retcor
+        @COMMAND_XCMS_SCRIPT@
+        image $image
+        xfunction retcor

         xsetRdataOutput $xsetRData
         ticspdf $ticsCorPdf
@@ -35,7 +35,7 @@

         @COMMAND_ZIPFILE_LOAD@

-        @COMMAND_LOG_EXIT@
+        @COMMAND_LOG_EXIT@
     ]]></command>

     <inputs>
@@ -47,8 +47,8 @@
             </param>
             <when value="obiwarp">
                 <param name="profStep" type="float" value="1" label="Step size (in m/z)" help="[profStep] to use for profile generation from the raw data files" />
-            </when>
-            <when value="peakgroups">
+            </when>
+            <when value="peakgroups">
                 <param name="smooth" type="select" label="Smooth method" help="[smooth] either 'loess’ for non-linear alignment or ‘linear’ for linear alignment" >
                     <option value="loess">loess</option>
                     <option value="linear">linear</option>
@@ -63,7 +63,7 @@
                     </param>
                     <when value="show">
                         <param name="span" type="float" value="0.2" label="Degree of smoothing for local polynomial regression fitting" help="[span]"/>
-
+
                         <param name="family" type="select" label="Family" help="[family] if gaussian fitting is by least-squares with no outlier removal, and if symmetric a re descending M estimator is used with Tukey's biweight function, allowing outlier removal">
                             <option value="gaussian" selected="true">gaussian</option>
                             <option value="symmetric">symmetric</option>
@@ -98,7 +98,7 @@
         <data name="bpcsCorPdf"   format="pdf" label="${image.name[:-6]}.retcor.BPCs_corrected.pdf" />
         <data name="log" format="txt" label="xset.log.txt"  hidden="true" />
     </outputs>
-
+
     <tests>
         <!--<test>
             <param name="image" value="xset.group.RData"/>
@@ -149,7 +149,7 @@
     </tests>

     <help><![CDATA[
-
+
 @HELP_AUTHORS@

 ===========
@@ -160,12 +160,12 @@
 Description
 -----------

-After matching peaks into groups, xcms can use those groups to identify and correct
-correlated drifts in retention time from run to run. The aligned peaks can then be
-used for a second pass of peak grouping which will be more accurate than the first.
-The whole process can be repeated in an iterative fashion. Not all peak groups will be helpful
-for identifying retention time drifts. Some groups may be missing peaks from a large
-fraction of samples and thus provide an incomplete picture of the drift at that time point.
+After matching peaks into groups, xcms can use those groups to identify and correct
+correlated drifts in retention time from run to run. The aligned peaks can then be
+used for a second pass of peak grouping which will be more accurate than the first.
+The whole process can be repeated in an iterative fashion. Not all peak groups will be helpful
+for identifying retention time drifts. Some groups may be missing peaks from a large
+fraction of samples and thus provide an incomplete picture of the drift at that time point.
 Still others may contain multiple peaks from the same sample, which is a sign of impropper grouping.

 .. class:: warningmark
@@ -250,15 +250,15 @@
 xset.group.retcor.RData: rdata.xcms.retcor format

     | Rdata file that will be necessary in the **xcms.group** step of the workflow.
-
-
+
+
 ------

-.. class:: infomark
+.. class:: infomark

 The output file is an xset.retcor.RData file. You can continue your analysis using it in **xcms.fillPeaks** tool.

-
+
 ---------------------------------------------------

 ---------------
@@ -281,7 +281,7 @@
     | span -> **0.2**
     | family -> **gaussian**
     | plottype -> **deviation**
-
+

 Output files
 ------------
@@ -298,6 +298,10 @@
 Changelog/News
 --------------

+**Version 2.0.8 - 22/12/2016**
+
+- BUGFIX: when having only one group (i.e. one folder of raw data) the BPC and TIC pdf files do not contain any graph
+
 **Version 2.0.7 - 06/07/2016**

 - UPGRADE: upgrate the xcms version from 1.44.0 to 1.46.0
--- a/lib.r	Wed Jul 06 17:43:01 2016 -0400
+++ b/lib.r	Mon Jan 30 08:53:30 2017 -0500
@@ -1,14 +1,59 @@
-# lib.r version="2.0.1"
 #Authors ABiMS TEAM
-#Lib.r for Galaxy Workflow4Metabo
+#Lib.r for Galaxy Workflow4Metabolomics xcms tools
+#
+#version 2.4: lecorguille
+#   add getPeaklistW4M
+#version 2.3: yguitton
+#   correction for empty PDF when only 1 class
 #version 2.2
-#Based on lib.r 2.1
-#Modifications made by Guitton Yann
-#correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet
-#Note if scanrange is used a warning is prompted in R console but do not stop PDF generation
+#   correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet
+#   Note if scanrange is used a warning is prompted in R console but do not stop PDF generation
+#version 2.1: yguitton
+#   Modifications made by Guitton Yann


+#@author G. Le Corguille
+#This function convert if it is required the Retention Time in minutes
+RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
+    if (convertRTMinute){
+        #converting the retention times (seconds) into minutes
+        print("converting the retention times into minutes in the variableMetadata")
+        variableMetadata[,"rt"]=variableMetadata[,"rt"]/60
+        variableMetadata[,"rtmin"]=variableMetadata[,"rtmin"]/60
+        variableMetadata[,"rtmax"]=variableMetadata[,"rtmax"]/60
+    }
+    return (variableMetadata)
+}

+#@author G. Le Corguille
+#This function format ions identifiers
+formatIonIdentifiers <- function(dataData, numDigitsRT=0, numDigitsMZ=0) {
+    return(make.unique(paste0("M",round(dataData[,"mz"],numDigitsMZ),"T",round(dataData[,"rt"],numDigitsRT))))
+}
+
+#@author G. Le Corguille
+# value: intensity values to be used into, maxo or intb
+getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) {
+    groups <- xset@groups
+    values <- groupval(xset, "medret", value=intval)
+
+    # renamming of the column rtmed to rt to fit with camera peaklist function output
+    colnames(groups)[colnames(groups)=="rtmed"] <- "rt"
+    colnames(groups)[colnames(groups)=="mzmed"] <- "mz"
+
+    ids <- formatIonIdentifiers(groups, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)
+    groups = RTSecondToMinute(groups, convertRTMinute)
+
+    rownames(groups) = ids
+    rownames(values) = ids
+
+    #@TODO: add "name" as the first column name
+    #colnames(groups)[1] = "name"
+    #colnames(values)[1] = "name"
+
+    write.table(groups, file=variableMetadataOutput,sep="\t",quote=F,row.names = T,col.names = NA)
+    write.table(values, file=dataMatrixOutput,sep="\t",quote=F,row.names = T,col.names = NA)
+}

 #@author Y. Guitton
 getBPC <- function(file,rtcor=NULL, ...) {
@@ -44,13 +89,13 @@
   for (j in 1:N) {

     TIC[[j]] <- getBPC(files[j])
-    #good for raw
+    #good for raw
     # seems strange for corrected
     #errors if scanrange used in xcmsSetgeneration
     if (!is.null(xcmsSet) && rt == "corrected")
     rtcor <- xcmsSet@rt$corrected[[j]] else
     rtcor <- NULL
-
+
     TIC[[j]] <- getBPC(files[j],rtcor=rtcor)
     # TIC[[j]][,1]<-rtcor
   }
@@ -68,11 +113,11 @@


   ##plot start
-
+
   if (length(class)>2){
     for (k in 1:(length(class)-1)){
       for (l in (k+1):length(class)){
-        #print(paste(class[k],"vs",class[l],sep=" "))
+        #print(paste(class[k],"vs",class[l],sep=" "))
         plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
         colvect<-NULL
         for (j in 1:length(classnames[[k]])) {
@@ -115,6 +160,24 @@

   }#end length ==2

+  #case where only one class
+  if (length(class)==1){
+    k=1
+		ylim = range(sapply(TIC, function(x) range(x[,2])))
+    colvect<-NULL
+    plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
+
+    for (j in 1:length(classnames[[k]])) {
+      tic <- TIC[[classnames[[k]][j]]]
+      # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
+      points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
+      colvect<-append(colvect,cols[classnames[[k]][j]])
+    }
+
+    legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
+
+  }#end length ==1
+
   dev.off() #pdf(pdfname,w=16,h=10)

   invisible(TIC)
@@ -153,7 +216,7 @@
   for (i in 1:length(class)){
     classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i])
   }
-
+
   N <- length(files)
   TIC <- vector("list",N)

@@ -178,7 +241,7 @@
   if (length(class)>2){
     for (k in 1:(length(class)-1)){
       for (l in (k+1):length(class)){
-        #print(paste(class[k],"vs",class[l],sep=" "))
+        #print(paste(class[k],"vs",class[l],sep=" "))
         plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
         colvect<-NULL
         for (j in 1:length(classnames[[k]])) {
@@ -219,6 +282,25 @@
     legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)

   }#end length ==2
+
+  #case where only one class
+  if (length(class)==1){
+	  k=1
+	  ylim = range(sapply(TIC, function(x) range(x[,2])))
+
+	  plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
+    colvect<-NULL
+		for (j in 1:length(classnames[[k]])) {
+      tic <- TIC[[classnames[[k]][j]]]
+			# points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
+			points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
+      colvect<-append(colvect,cols[classnames[[k]][j]])
+	  }
+
+		legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
+
+	}#end length ==1
+
   dev.off() #pdf(pdfname,w=16,h=10)

   invisible(TIC)
@@ -237,7 +319,7 @@
   sampleMetadata=xset@phenoData
   sampleNamesOrigin=rownames(sampleMetadata)
   sampleNamesMakeNames=make.names(sampleNamesOrigin)
-
+
   if (any(duplicated(sampleNamesMakeNames))) {
     write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())
     for (sampleName in sampleNamesOrigin) {
@@ -285,7 +367,7 @@

       #Set the polarity attribute
       sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity
-
+
       #Delete xcmsRaw object because it creates a bug for the fillpeaks step
       rm(xcmsRaw)
     }
@@ -321,7 +403,7 @@
   filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]

   # COMPARISON
-  if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
+  if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
     write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
     write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
     stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
@@ -347,7 +429,7 @@
     write(capture, stderr())
     stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files")
   }
-
+
 }


@@ -359,7 +441,7 @@
   cat("Checking Non ASCII characters in the XML...\n")

   processed=F
-  l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE)
+  l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE)
   for (i in l){
     cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="")
     capture=suppressWarnings(system(cmd,intern=TRUE))
@@ -368,7 +450,7 @@
       print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") )
       c=system(cmd,intern=TRUE)
       capture=""
-      processed=T
+      processed=T
     }
   }
   if (processed) cat("\n\n")
@@ -376,7 +458,7 @@
 }


-##
+##
 ## This function will compute MD5 checksum to check the data integrity
 ##
 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
@@ -397,4 +479,3 @@

   return(as.matrix(md5sum(files)))
 }
-
--- a/macros.xml	Wed Jul 06 17:43:01 2016 -0400
+++ b/macros.xml	Mon Jan 30 08:53:30 2017 -0500
@@ -2,7 +2,6 @@
 <macros>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="3.1.2">R</requirement>
             <requirement type="package" version="0.4_1">r-snow</requirement>
             <requirement type="package" version="1.46.0">bioconductor-xcms</requirement>
             <requirement type="package" version="1.1_4">r-batch</requirement>
@@ -40,7 +39,7 @@
         <conditional name="zipfile_load_conditional">
             <param name="zipfile_load_select" type="select" label="Resubmit your zip file" help="Use only if you get a message which say that your original zip file have been deleted on the server." >
                 <option value="no" >no need</option>
-                <option value="yes" selected="peakgroups">yes</option>
+                <option value="yes">yes</option>
             </param>
             <when value="no">
             </when>
--- a/xcms.r	Wed Jul 06 17:43:01 2016 -0400
+++ b/xcms.r	Mon Jan 30 08:53:30 2017 -0500
@@ -19,7 +19,7 @@
   cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="")
 }
 source_local <- function(fname){ argv <- commandArgs(trailingOnly = FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) }
-cat("\n\n");
+cat("\n\n");


@@ -64,18 +64,35 @@
   xsetRdataOutput = listArguments[["xsetRdataOutput"]]; listArguments[["xsetRdataOutput"]]=NULL
 }

+#saving the specific parameters
 rplotspdf = "Rplots.pdf"
 if (!is.null(listArguments[["rplotspdf"]])){
   rplotspdf = listArguments[["rplotspdf"]]; listArguments[["rplotspdf"]]=NULL
 }
-
 sampleMetadataOutput = "sampleMetadata.tsv"
 if (!is.null(listArguments[["sampleMetadataOutput"]])){
   sampleMetadataOutput = listArguments[["sampleMetadataOutput"]]; listArguments[["sampleMetadataOutput"]]=NULL
 }
-
-
-
+variableMetadataOutput = "variableMetadata.tsv"
+if (!is.null(listArguments[["variableMetadataOutput"]])){
+  variableMetadataOutput = listArguments[["variableMetadataOutput"]]; listArguments[["variableMetadataOutput"]]=NULL
+}
+dataMatrixOutput = "dataMatrix.tsv"
+if (!is.null(listArguments[["dataMatrixOutput"]])){
+  dataMatrixOutput = listArguments[["dataMatrixOutput"]]; listArguments[["dataMatrixOutput"]]=NULL
+}
+if (!is.null(listArguments[["convertRTMinute"]])){
+  convertRTMinute = listArguments[["convertRTMinute"]]; listArguments[["convertRTMinute"]]=NULL
+}
+if (!is.null(listArguments[["numDigitsMZ"]])){
+  numDigitsMZ = listArguments[["numDigitsMZ"]]; listArguments[["numDigitsMZ"]]=NULL
+}
+if (!is.null(listArguments[["numDigitsRT"]])){
+  numDigitsRT = listArguments[["numDigitsRT"]]; listArguments[["numDigitsRT"]]=NULL
+}
+if (!is.null(listArguments[["intval"]])){
+  intval = listArguments[["intval"]]; listArguments[["intval"]]=NULL
+}

 if (thefunction %in% c("xcmsSet","retcor")) {
   ticspdf = listArguments[["ticspdf"]]; listArguments[["ticspdf"]]=NULL
@@ -116,15 +133,15 @@
     suppressWarnings(unzip(zipfile, unzip="unzip"))

     #get the directory name
-    filesInZip=unzip(zipfile, list=T);
+    filesInZip=unzip(zipfile, list=T);
     directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])));
     directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
     directory = "."
     if (length(directories) == 1) directory = directories
-
+
     cat("files_root_directory\t",directory,"\n")

-    #
+    #
     md5sumList=list("origin"=getMd5sum(directory))

     # Check and fix if there are non ASCII characters. If so, they will be removed from the *mzXML mzML files.
@@ -187,6 +204,8 @@


 #execution of the function "thefunction" with the parameters given in "listArguments"
+
+cat("\t\tCOMPUTE\n")
 xset = do.call(thefunction, listArguments)


@@ -200,7 +219,7 @@
   xset@filepaths<-sub(paste(getwd(),"/",sep="") ,"", xset@filepaths)

   if(exists("zipfile") && (zipfile!="")) {
-
+
     #Modify the samples names (erase the path)
     for(i in 1:length(sampnames(xset))){

@@ -217,17 +236,24 @@

 # -- TIC --
 if (thefunction == "xcmsSet") {
+  cat("\t\tGET TIC GRAPH\n")
   sampleNamesList = getSampleMetadata(xcmsSet=xset, sampleMetadataOutput=sampleMetadataOutput)
   getTICs(xcmsSet=xset, pdfname=ticspdf,rt="raw")
   getBPCs(xcmsSet=xset,rt="raw",pdfname=bicspdf)
 } else if (thefunction == "retcor") {
+  cat("\t\tGET TIC GRAPH\n")
   getTICs(xcmsSet=xset, pdfname=ticspdf,rt="corrected")
   getBPCs(xcmsSet=xset,rt="corrected",pdfname=bicspdf)
 }

+if (thefunction == "fillPeaks") {
+  cat("\t\tGET THE PEAK LIST\n")
+  getPeaklistW4M(xset,intval,convertRTMinute,numDigitsMZ,numDigitsRT,variableMetadataOutput,dataMatrixOutput)
+}
+
+
 cat("\n\n")

-
 # ----- EXPORT -----

 cat("\tXSET OBJECT INFO\n")
@@ -243,4 +269,3 @@


 cat("\tDONE\n")
-