Mercurial > repos > lecorguille > xcms_retcor

--- a/README.rst	Mon Feb 22 16:38:15 2016 -0500
+++ b/README.rst	Fri Apr 08 10:39:32 2016 -0400
@@ -2,6 +2,11 @@
 Changelog/News
 --------------

+**Version 2.0.6 - 04/04/2016**
+
+- TEST: refactoring to pass planemo test using conda dependencies
+
+
 **Version 2.0.5 - 10/02/2016**

 - BUGFIX: better management of errors. Datasets remained green although the process failed
@@ -19,3 +24,14 @@

 - IMPROVEMENT: parameter labels have changed to facilitate their reading.

+
+Test Status
+-----------
+
+Planemo test using conda: passed
+
+Planemo test using source env.sh: passed
+
+Planemo shed_test : passed
+
+
--- a/abims_xcms_retcor.xml	Mon Feb 22 16:38:15 2016 -0500
+++ b/abims_xcms_retcor.xml	Fri Apr 08 10:39:32 2016 -0400
@@ -1,20 +1,16 @@
-<tool id="abims_xcms_retcor" name="xcms.retcor" version="2.0.5">
+<tool id="abims_xcms_retcor" name="xcms.retcor" version="2.0.6">

     <description>Retention Time Correction using retcor function from xcms R package </description>

-    <requirements>
-        <requirement type="package" version="3.1.2">R</requirement>
-        <requirement type="binary">Rscript</requirement>
-        <requirement type="package" version="1.44.0">xcms</requirement>
-        <requirement type="package" version="2.2.0">xcms_w4m_script</requirement>
-    </requirements>
+    <macros>
+        <import>macros.xml</import>
+    </macros>

-    <stdio>
-        <exit_code range="1:" level="fatal" />
-    </stdio>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>

     <command><![CDATA[
-        xcms.r
+        @COMMAND_XCMS_SCRIPT@
         image $image
         xfunction retcor

@@ -36,15 +32,10 @@
                 plottype $methods.options.plottype
             #end if
         #end if
-        ###if $zip_file:
-        ##    zipfile $zip_file
-        ###end if
-        ;
-        return=\$?;
-        mv log.txt $log;
-        cat $log;
-        sh -c "exit \$return"
-
+        #if $zip_file:
+            zipfile $zip_file
+        #end if
+        @COMMAND_LOG_EXIT@
     ]]></command>

     <inputs>
@@ -64,6 +55,7 @@
                 </param>
                 <param name="extra" type="integer" value="1" label="Number of extra peaks to allow in retention time correction correction groups" help="[extra]" />
                 <param name="missing" type="integer" value="1" label="Number of missing samples to allow in retention time correction groups" help="[missing]" />
+
                 <conditional name="options">
                     <param name="option" type="select" label="Advanced options">
                         <option value="show">show</option>
@@ -82,14 +74,15 @@
                             <option value="deviation">deviation</option>
                             <option value="mdevden">mdevden</option>
                         </param>
+
                     </when>
                     <when value="hide">
                     </when>
                 </conditional>
             </when>
         </conditional>
-        <!-- To pass planemo test -->
-        <!--<param name="zip_file" type="hidden_data" format="no_unzip.zip" label="Zip file" />-->
+	<!-- To pass planemo test -->
+        <param name="zip_file" type="data" format="no_unzip.zip,zip" label="Zip file" help="Use only if you get a message which say that your original zip file have been deleted on the server." />
     </inputs>

     <outputs>
@@ -108,25 +101,21 @@
     <tests>
         <test>
             <param name="image" value="xset.group.RData"/>
-            <param name="methods.method" value="peakgroups"/>
-            <param name="methods.smooth" value="loess"/>
-            <param name="methods.extra" value="1"/>
-            <param name="methods.missing" value="1"/>
-            <param name="methods.options.option" value="show"/>
-            <param name="methods.options.span" value="0.2"/>
-            <param name="methods.options.family" value="gaussian"/>
-            <param name="methods.options.plottype" value="deviation"/>
-            <param name="zip_file" value="sacuri.zip"/>
-            <!--<output name="xsetRData" file="xset.group.retcor.RData" />-->
-            <!--<output name="rplotsPdf" file="xset.group.retcor.Rplots.pdf" />-->
-            <!--<output name="ticsCorPdf" file="xset.group.retcor.TICs_corrected.pdf" />-->
-            <!--<output name="bpcsCorPdf" file="xset.group.retcor.BPCs_corrected.pdf" />-->
+            <param name="methods|method" value="peakgroups"/>
+            <param name="methods|smooth" value="loess"/>
+            <param name="methods|extra" value="1"/>
+            <param name="methods|missing" value="1"/>
+            <param name="methods|options|option" value="show"/>
+            <param name="methods|options|span" value="0.2"/>
+            <param name="methods|options|family" value="gaussian"/>
+            <param name="methods|options|plottype" value="deviation"/>
+            <param name="zip_file" value="sacuri_dir_root.zip"  ftype="zip" />
             <output name="log">
                 <assert_contents>
-                    <has_text text="object with 9 samples" />
-                    <has_text text="Time range: 0.7-1139.9 seconds (0-19 minutes)" />
-                    <has_text text="Mass range: 50.0019-999.9863 m/z" />
-                    <has_text text="Peaks: 135846 (about 15094 per sample)" />
+                    <has_text text="object with 4 samples" />
+                    <has_text text="Time range: 0.2-1140.1 seconds (0-19 minutes)" />
+                    <has_text text="Mass range: 50.0021-999.9863 m/z" />
+                    <has_text text="Peaks: 59359 (about 14840 per sample)" />
                     <has_text text="Peak Groups: 0" />
                     <has_text text="Sample classes: bio, blank" />
                 </assert_contents>
@@ -135,21 +124,8 @@
     </tests>

     <help><![CDATA[
-

-.. class:: infomark
-
-**Authors**  Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu
-
-.. class:: infomark
-
-**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@univ-nantes.fr - part of Workflow4Metabolomics.org [W4M]
-
- | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool.
-
-
-
----------------------------------------------------
+@HELP_AUTHORS@

 ===========
 Xcms.retcor
@@ -297,6 +273,11 @@
 Changelog/News
 --------------

+**Version 2.0.6 - 04/04/2016**
+
+- TEST: refactoring to pass planemo test using conda dependencies
+
+
 **Version 2.0.5 - 10/02/2016**

 - BUGFIX: better management of errors. Datasets remained green although the process failed
@@ -317,9 +298,7 @@

     ]]></help>

-    <citations>
-        <citation type="doi">10.1021/ac051437y</citation>
-        <citation type="doi">10.1093/bioinformatics/btu813</citation>
-    </citations>
+
+    <expand macro="citation" />

 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib.r	Fri Apr 08 10:39:32 2016 -0400
@@ -0,0 +1,400 @@
+# lib.r version="2.0.1"
+#Authors ABiMS TEAM
+#Lib.r for Galaxy Workflow4Metabo
+#version 2.2
+#Based on lib.r 2.1
+#Modifications made by Guitton Yann
+#correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet
+#Note if scanrange is used a warning is prompted in R console but do not stop PDF generation
+
+
+
+
+#@author Y. Guitton
+getBPC <- function(file,rtcor=NULL, ...) {
+  object <- xcmsRaw(file)
+  sel <- profRange(object, ...)
+  cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE]))
+  #plotChrom(xcmsRaw(file), base=T)
+}
+
+#@author Y. Guitton
+getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) {
+  cat("Creating BIC pdf...\n")
+
+  if (is.null(xcmsSet)) {
+    cat("Enter an xcmsSet \n")
+    stop()
+  } else {
+    files <- filepaths(xcmsSet)
+  }
+
+  class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class
+
+  classnames<-vector("list",length(class))
+  for (i in 1:length(class)){
+    classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i])
+  }
+
+  N <- dim(phenoData(xcmsSet))[1]
+
+  TIC <- vector("list",N)
+
+
+  for (j in 1:N) {
+
+    TIC[[j]] <- getBPC(files[j])
+    #good for raw
+    # seems strange for corrected
+    #errors if scanrange used in xcmsSetgeneration
+    if (!is.null(xcmsSet) && rt == "corrected")
+    rtcor <- xcmsSet@rt$corrected[[j]] else
+    rtcor <- NULL
+
+    TIC[[j]] <- getBPC(files[j],rtcor=rtcor)
+    # TIC[[j]][,1]<-rtcor
+  }
+
+
+
+  pdf(pdfname,w=16,h=10)
+  cols <- rainbow(N)
+  lty = 1:N
+  pch = 1:N
+  #search for max x and max y in BPCs
+  xlim = range(sapply(TIC, function(x) range(x[,1])))
+  ylim = range(sapply(TIC, function(x) range(x[,2])))
+  ylim = c(-ylim[2], ylim[2])
+
+
+  ##plot start
+
+  if (length(class)>2){
+    for (k in 1:(length(class)-1)){
+      for (l in (k+1):length(class)){
+        #print(paste(class[k],"vs",class[l],sep=" "))
+        plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
+        colvect<-NULL
+        for (j in 1:length(classnames[[k]])) {
+          tic <- TIC[[classnames[[k]][j]]]
+          # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
+          points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
+          colvect<-append(colvect,cols[classnames[[k]][j]])
+        }
+        for (j in 1:length(classnames[[l]])) {
+          # i=class2names[j]
+          tic <- TIC[[classnames[[l]][j]]]
+          points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
+          colvect<-append(colvect,cols[classnames[[l]][j]])
+        }
+        legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
+      }
+    }
+  }#end if length >2
+
+  if (length(class)==2){
+    k=1
+    l=2
+    colvect<-NULL
+    plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
+
+    for (j in 1:length(classnames[[k]])) {
+
+      tic <- TIC[[classnames[[k]][j]]]
+      # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
+      points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
+      colvect<-append(colvect,cols[classnames[[k]][j]])
+    }
+    for (j in 1:length(classnames[[l]])) {
+      # i=class2names[j]
+      tic <- TIC[[classnames[[l]][j]]]
+      points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
+      colvect<-append(colvect,cols[classnames[[l]][j]])
+    }
+    legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
+
+  }#end length ==2
+
+  dev.off() #pdf(pdfname,w=16,h=10)
+
+  invisible(TIC)
+}
+
+
+
+#@author Y. Guitton
+getTIC <- function(file,rtcor=NULL) {
+  object <- xcmsRaw(file)
+  cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity)
+}
+
+##
+##  overlay TIC from all files in current folder or from xcmsSet, create pdf
+##
+#@author Y. Guitton
+getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) {
+  cat("Creating TIC pdf...\n")
+
+  if (is.null(xcmsSet)) {
+    filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
+    filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
+    if (is.null(files))
+      files <- getwd()
+    info <- file.info(files)
+    listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE)
+    files <- c(files[!info$isdir], listed)
+  } else {
+    files <- filepaths(xcmsSet)
+  }
+
+  class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class
+
+  classnames<-vector("list",length(class))
+  for (i in 1:length(class)){
+    classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i])
+  }
+
+  N <- length(files)
+  TIC <- vector("list",N)
+
+  for (i in 1:N) {
+    if (!is.null(xcmsSet) && rt == "corrected")
+      rtcor <- xcmsSet@rt$corrected[[i]] else
+    rtcor <- NULL
+    TIC[[i]] <- getTIC(files[i],rtcor=rtcor)
+  }
+
+  pdf(pdfname,w=16,h=10)
+  cols <- rainbow(N)
+  lty = 1:N
+  pch = 1:N
+  #search for max x and max y in TICs
+  xlim = range(sapply(TIC, function(x) range(x[,1])))
+  ylim = range(sapply(TIC, function(x) range(x[,2])))
+  ylim = c(-ylim[2], ylim[2])
+
+
+  ##plot start
+  if (length(class)>2){
+    for (k in 1:(length(class)-1)){
+      for (l in (k+1):length(class)){
+        #print(paste(class[k],"vs",class[l],sep=" "))
+        plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
+        colvect<-NULL
+        for (j in 1:length(classnames[[k]])) {
+
+          tic <- TIC[[classnames[[k]][j]]]
+          # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
+          points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
+          colvect<-append(colvect,cols[classnames[[k]][j]])
+        }
+        for (j in 1:length(classnames[[l]])) {
+          # i=class2names[j]
+          tic <- TIC[[classnames[[l]][j]]]
+          points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
+          colvect<-append(colvect,cols[classnames[[l]][j]])
+        }
+        legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
+      }
+    }
+  }#end if length >2
+  if (length(class)==2){
+    k=1
+    l=2
+
+    plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
+    colvect<-NULL
+    for (j in 1:length(classnames[[k]])) {
+      tic <- TIC[[classnames[[k]][j]]]
+      # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
+      points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
+      colvect<-append(colvect,cols[classnames[[k]][j]])
+    }
+    for (j in 1:length(classnames[[l]])) {
+      # i=class2names[j]
+      tic <- TIC[[classnames[[l]][j]]]
+      points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
+      colvect<-append(colvect,cols[classnames[[l]][j]])
+    }
+    legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
+
+  }#end length ==2
+  dev.off() #pdf(pdfname,w=16,h=10)
+
+  invisible(TIC)
+}
+
+
+
+##
+##  Get the polarities from all the samples of a condition
+#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
+#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
+getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") {
+  cat("Creating the sampleMetadata file...\n")
+
+  #Create the sampleMetada dataframe
+  sampleMetadata=xset@phenoData
+  sampleNamesOrigin=rownames(sampleMetadata)
+  sampleNamesMakeNames=make.names(sampleNamesOrigin)
+
+  if (any(duplicated(sampleNamesMakeNames))) {
+    write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())
+    for (sampleName in sampleNamesOrigin) {
+      write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr())
+    }
+    stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
+  }
+
+  if (!all(sampleNamesOrigin == sampleNamesMakeNames)) {
+    cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n")
+    for (sampleName in sampleNamesOrigin) {
+      cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n"))
+    }
+  }
+
+  sampleMetadata$sampleMetadata=sampleNamesMakeNames
+  sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns
+  rownames(sampleMetadata)=NULL
+
+  #Create a list of files name in the current directory
+  list_files=xset@filepaths
+  #For each sample file, the following actions are done
+  for (file in list_files){
+    #Check if the file is in the CDF format
+    if (!mzR:::netCDFIsFile(file)){
+
+      # If the column isn't exist, with add one filled with NA
+      if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA
+
+      #Create a simple xcmsRaw object for each sample
+      xcmsRaw=xcmsRaw(file)
+      #Extract the polarity (a list of polarities)
+      polarity=xcmsRaw@polarity
+      #Verify if all the scans have the same polarity
+      uniq_list=unique(polarity)
+      if (length(uniq_list)>1){
+        polarity="mixed"
+      } else {
+        polarity=as.character(uniq_list)
+      }
+      #Transforms the character to obtain only the sample name
+      filename=basename(file)
+      library(tools)
+      samplename=file_path_sans_ext(filename)
+
+      #Set the polarity attribute
+      sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity
+
+      #Delete xcmsRaw object because it creates a bug for the fillpeaks step
+      rm(xcmsRaw)
+    }
+
+  }
+
+  write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput)
+
+  return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames))
+
+}
+
+
+##
+## This function check if xcms will found all the files
+##
+#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
+checkFilesCompatibilityWithXcms <- function(directory) {
+  cat("Checking files filenames compatibilities with xmcs...\n")
+  # WHAT XCMS WILL FIND
+  filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
+  filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|")
+  info <- file.info(directory)
+  listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE)
+  files <- c(directory[!info$isdir], listed)
+  files_abs <- file.path(getwd(), files)
+  exists <- file.exists(files_abs)
+  files[exists] <- files_abs[exists]
+  files[exists] <- sub("//","/",files[exists])
+
+  # WHAT IS ON THE FILESYSTEM
+  filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T)
+  filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
+
+  # COMPARISON
+  if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
+    write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
+    write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
+    stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
+
+  }
+}
+
+
+
+##
+## This function check if XML contains special caracters. It also checks integrity and completness.
+##
+#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
+checkXmlStructure <- function (directory) {
+  cat("Checking XML structure...\n")
+
+  cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
+  capture=system(cmd,intern=TRUE)
+
+  if (length(capture)>0){
+    #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
+    write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
+    write(capture, stderr())
+    stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files")
+  }
+
+}
+
+
+##
+## This function check if XML contain special characters
+##
+#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
+deleteXmlBadCharacters<- function (directory) {
+  cat("Checking Non ASCII characters in the XML...\n")
+
+  processed=F
+  l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE)
+  for (i in l){
+    cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="")
+    capture=suppressWarnings(system(cmd,intern=TRUE))
+    if (length(capture)>0){
+      cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i)
+      print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") )
+      c=system(cmd,intern=TRUE)
+      capture=""
+      processed=T
+    }
+  }
+  if (processed) cat("\n\n")
+  return(processed)
+}
+
+
+##
+## This function will compute MD5 checksum to check the data integrity
+##
+#@author Gildas Le Corguille lecorguille@sb-roscoff.fr
+getMd5sum <- function (directory) {
+  cat("Compute md5 checksum...\n")
+  # WHAT XCMS WILL FIND
+  filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
+  filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|")
+  info <- file.info(directory)
+  listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE)
+  files <- c(directory[!info$isdir], listed)
+  exists <- file.exists(files)
+  files <- files[exists]
+
+  library(tools)
+
+  #cat("\n\n")
+
+  return(as.matrix(md5sum(files)))
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Fri Apr 08 10:39:32 2016 -0400
@@ -0,0 +1,51 @@
+<?xml version="1.0"?>
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="3.1.2">R</requirement>
+	    <requirement type="package" version="0.4_1">r-snow</requirement>
+            <requirement type="package" version="1.44.0">bioconductor-xcms</requirement>
+	    <requirement type="package" version="1.1_4">r-batch</requirement>
+        </requirements>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1" level="fatal" />
+        </stdio>
+    </xml>
+
+    <token name="@COMMAND_XCMS_SCRIPT@">
+        LANG=C Rscript $__tool_directory__/xcms.r
+    </token>
+
+    <token name="@COMMAND_LOG_EXIT@">
+        ;
+        return=\$?;
+        mv log.txt $log;
+        cat $log;
+        sh -c "exit \$return"
+    </token>
+
+    <token name="@HELP_AUTHORS@">
+.. class:: infomark
+
+**Authors**  Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu
+
+.. class:: infomark
+
+**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@oniris-nantes.fr - part of Workflow4Metabolomics.org [W4M]
+
+ | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool.
+
+---------------------------------------------------
+
+    </token>
+
+
+    <xml name="citation">
+        <citations>
+            <citation type="doi">10.1021/ac051437y</citation>
+            <citation type="doi">10.1093/bioinformatics/btu813</citation>
+        </citations>
+    </xml>
+</macros>
--- a/planemo.sh	Mon Feb 22 16:38:15 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-planemo shed_init -f --name=xcms_retcor --owner=lecorguille --description="[W4M][GC-MS] XCMS R Package - Preprocessing - Correct retention time from different samples" --homepage_url="http://workflow4metabolomics.org" --long_description="Part of the W4M project: http://workflow4metabolomics.org\n\nXCMS: http://www.bioconductor.org/packages/release/bioc/html/xcms.html\n\nRetention Time Correction using retcor function from xcms R package\n\nBEWARE: this tool don't come with its script. You will need to install the dedicated package_xcms_w4m_script too" --category="Metabolomics"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/planemo_test.sh	Fri Apr 08 10:39:32 2016 -0400
@@ -0,0 +1,19 @@
+planemo conda_init
+planemo conda_install .
+planemo test --install_galaxy --conda_dependency_resolution --galaxy_branch "dev"
+
+#All 1 test(s) executed passed.
+#abims_xcms_retcor[0]: passed
+
+
+source /w/galaxy/dev/shed_tools_tool_dependency_dir/R/3.1.2/iuc/package_r_3_1_2/1ca39eb16186/env.sh
+source /w/galaxy/dev/shed_tools_tool_dependency_dir/bioconductor-xcms/1.44.0/lecorguille/package_bioconductor_xcms_1_44_0/0c38f7d43e08/env.sh
+planemo test --install_galaxy --galaxy_branch "dev"
+
+#All 1 test(s) executed passed.
+#abims_xcms_retcor[0]: passed
+
+
+planemo shed_test --install_galaxy --galaxy_branch "dev" -t testtoolshed
+#All 1 test(s) executed passed.
+#testtoolshed.g2.bx.psu.edu/repos/lecorguille/xcms_retcor/abims_xcms_retcor/2.0.6[0]: passed
\ No newline at end of file
--- a/repository_dependencies.xml	Mon Feb 22 16:38:15 2016 -0500
+++ b/repository_dependencies.xml	Fri Apr 08 10:39:32 2016 -0400
@@ -1,5 +1,5 @@
 <?xml version="1.0"?>
 <repositories>
     <repository changeset_revision="7800ba9a4c1e" name="no_unzip_datatype" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" />
-    <repository changeset_revision="d64562a4ebb3" name="rdata_xcms_datatypes" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" />
+	<repository changeset_revision="d64562a4ebb3" name="rdata_xcms_datatypes" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" />
 </repositories>
Binary file test-data/sacuri.zip has changed
Binary file test-data/sacuri_dir_root.zip has changed
Binary file test-data/xset.group.RData has changed
Binary file test-data/xset.group.retcor.BPCs_corrected.pdf has changed
Binary file test-data/xset.group.retcor.RData has changed
Binary file test-data/xset.group.retcor.Rplots.pdf has changed
Binary file test-data/xset.group.retcor.TICs_corrected.pdf has changed
--- a/tool_dependencies.xml	Mon Feb 22 16:38:15 2016 -0500
+++ b/tool_dependencies.xml	Fri Apr 08 10:39:32 2016 -0400
@@ -1,12 +1,9 @@
 <?xml version="1.0"?>
 <tool_dependency>
     <package name="R" version="3.1.2">
-        <repository changeset_revision="c987143177d4" name="package_r_3_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+        <repository changeset_revision="4d2fd1413b56" name="package_r_3_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
-    <package name="xcms" version="1.44.0">
-        <repository changeset_revision="4443617bdd85" name="package_r_xcms_1_44_0" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="xcms_w4m_script" version="2.2.0">
-        <repository changeset_revision="115cf2b43a3c" name="package_xcms_w4m_script_2_2_0" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <package name="bioconductor-xcms" version="1.44.0">
+        <repository changeset_revision="58ebb405a3d6" name="package_bioconductor_xcms_1_44_0" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xcms.r	Fri Apr 08 10:39:32 2016 -0400
@@ -0,0 +1,246 @@
+#!/usr/bin/env Rscript
+# xcms.r version="2.2.0"
+#Authors ABIMS TEAM
+#BPC Addition from Y.guitton
+
+
+# ----- LOG FILE -----
+log_file=file("log.txt", open = "wt")
+sink(log_file)
+sink(log_file, type = "output")
+
+
+# ----- PACKAGE -----
+cat("\tPACKAGE INFO\n")
+#pkgs=c("xcms","batch")
+pkgs=c("parallel","BiocGenerics", "Biobase", "Rcpp", "mzR", "xcms","snow","batch")
+for(pkg in pkgs) {
+  suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE)))
+  cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="")
+}
+source_local <- function(fname){ argv <- commandArgs(trailingOnly = FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) }
+cat("\n\n");
+
+
+
+
+
+# ----- ARGUMENTS -----
+cat("\tARGUMENTS INFO\n")
+listArguments = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects
+write.table(as.matrix(listArguments), col.names=F, quote=F, sep='\t')
+
+cat("\n\n");
+
+
+# ----- ARGUMENTS PROCESSING -----
+cat("\tINFILE PROCESSING INFO\n")
+
+#image is an .RData file necessary to use xset variable given by previous tools
+if (!is.null(listArguments[["image"]])){
+  load(listArguments[["image"]]); listArguments[["image"]]=NULL
+}
+
+#Import the different functions
+source_local("lib.r")
+
+cat("\n\n")
+
+#Import the different functions
+
+# ----- PROCESSING INFILE -----
+cat("\tARGUMENTS PROCESSING INFO\n")
+
+# Save arguments to generate a report
+if (!exists("listOFlistArguments")) listOFlistArguments=list()
+listOFlistArguments[[paste(format(Sys.time(), "%y%m%d-%H:%M:%S_"),listArguments[["xfunction"]],sep="")]] = listArguments
+
+
+#saving the commun parameters
+thefunction = listArguments[["xfunction"]]; listArguments[["xfunction"]]=NULL #delete from the list of arguments
+
+xsetRdataOutput = paste(thefunction,"RData",sep=".")
+if (!is.null(listArguments[["xsetRdataOutput"]])){
+  xsetRdataOutput = listArguments[["xsetRdataOutput"]]; listArguments[["xsetRdataOutput"]]=NULL
+}
+
+rplotspdf = "Rplots.pdf"
+if (!is.null(listArguments[["rplotspdf"]])){
+  rplotspdf = listArguments[["rplotspdf"]]; listArguments[["rplotspdf"]]=NULL
+}
+
+sampleMetadataOutput = "sampleMetadata.tsv"
+if (!is.null(listArguments[["sampleMetadataOutput"]])){
+  sampleMetadataOutput = listArguments[["sampleMetadataOutput"]]; listArguments[["sampleMetadataOutput"]]=NULL
+}
+
+
+
+
+if (thefunction %in% c("xcmsSet","retcor")) {
+  ticspdf = listArguments[["ticspdf"]]; listArguments[["ticspdf"]]=NULL
+  bicspdf = listArguments[["bicspdf"]]; listArguments[["bicspdf"]]=NULL
+}
+
+#necessary to unzip .zip file uploaded to Galaxy
+#thanks to .zip file it's possible to upload many file as the same time conserving the tree hierarchy of directories
+
+
+if (!is.null(listArguments[["zipfile"]])){
+  zipfile= listArguments[["zipfile"]]; listArguments[["zipfile"]]=NULL
+}
+
+if (!is.null(listArguments[["library"]])){
+  directory=listArguments[["library"]]; listArguments[["library"]]=NULL
+  if(!file.exists(directory)){
+    error_message=paste("Cannot access the directory:",directory,". Please verify if the directory exists or not.")
+    print(error_message)
+    stop(error_message)
+  }
+}
+
+# We unzip automatically the chromatograms from the zip files.
+if (thefunction %in% c("xcmsSet","retcor","fillPeaks"))  {
+  if(exists("zipfile") && (zipfile!="")) {
+    if(!file.exists(zipfile)){
+      error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")
+      print(error_message)
+      stop(error_message)
+    }
+
+    #list all file in the zip file
+    #zip_files=unzip(zipfile,list=T)[,"Name"]
+
+
+    #unzip
+    suppressWarnings(unzip(zipfile, unzip="unzip"))
+
+    #get the directory name
+    filesInZip=unzip(zipfile, list=T);
+    directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])));
+    directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
+    directory = "."
+    if (length(directories) == 1) directory = directories
+
+    cat("files_root_directory\t",directory,"\n")
+
+    #
+    md5sumList=list("origin"=getMd5sum(directory))
+
+    # Check and fix if there are non ASCII characters. If so, they will be removed from the *mzXML mzML files.
+    # Remove because can create issue with some clean files
+    #@TODO: fix me
+    #if (deleteXmlBadCharacters(directory)) {
+    #  md5sumList=list("removalBadCharacters"=getMd5sum(directory))
+    #}
+
+  }
+}
+
+#addition of the directory to the list of arguments in the first position
+if (thefunction == "xcmsSet") {
+  checkXmlStructure(directory)
+  checkFilesCompatibilityWithXcms(directory)
+  listArguments=append(directory, listArguments)
+}
+
+
+#addition of xset object to the list of arguments in the first position
+if (exists("xset")){
+  listArguments=append(list(xset), listArguments)
+}
+
+cat("\n\n")
+
+
+
+
+
+
+# ----- MAIN PROCESSING INFO -----
+cat("\tMAIN PROCESSING INFO\n")
+
+
+#Verification of a group step before doing the fillpeaks job.
+
+if (thefunction == "fillPeaks") {
+  res=try(is.null(groupnames(xset)))
+  if (class(res) == "try-error"){
+    error<-geterrmessage()
+    write(error, stderr())
+    stop("You must always do a group step after a retcor. Otherwise it won't work for the fillpeaks step")
+  }
+
+}
+
+#change the default display settings
+#dev.new(file="Rplots.pdf", width=16, height=12)
+pdf(file=rplotspdf, width=16, height=12)
+if (thefunction == "group") {
+  par(mfrow=c(2,2))
+}
+#else if (thefunction == "retcor") {
+#try to change the legend display
+#     par(xpd=NA)
+#     par(xpd=T, mar=par()$mar+c(0,0,0,4))
+#}
+
+
+#execution of the function "thefunction" with the parameters given in "listArguments"
+xset = do.call(thefunction, listArguments)
+
+
+cat("\n\n")
+
+dev.off() #dev.new(file="Rplots.pdf", width=16, height=12)
+
+if (thefunction  == "xcmsSet") {
+
+  #transform the files absolute pathways into relative pathways
+  xset@filepaths<-sub(paste(getwd(),"/",sep="") ,"", xset@filepaths)
+
+  if(exists("zipfile") && (zipfile!="")) {
+
+    #Modify the samples names (erase the path)
+    for(i in 1:length(sampnames(xset))){
+
+      sample_name=unlist(strsplit(sampnames(xset)[i], "/"))
+      sample_name=sample_name[length(sample_name)]
+      sample_name= unlist(strsplit(sample_name,"[.]"))[1]
+      sampnames(xset)[i]=sample_name
+
+    }
+
+  }
+
+}
+
+# -- TIC --
+if (thefunction == "xcmsSet") {
+  sampleNamesList = getSampleMetadata(xcmsSet=xset, sampleMetadataOutput=sampleMetadataOutput)
+  getTICs(xcmsSet=xset, pdfname=ticspdf,rt="raw")
+  getBPCs(xcmsSet=xset,rt="raw",pdfname=bicspdf)
+} else if (thefunction == "retcor") {
+  getTICs(xcmsSet=xset, pdfname=ticspdf,rt="corrected")
+  getBPCs(xcmsSet=xset,rt="corrected",pdfname=bicspdf)
+}
+
+cat("\n\n")
+
+
+# ----- EXPORT -----
+
+cat("\tXSET OBJECT INFO\n")
+print(xset)
+#delete the parameters to avoid the passage to the next tool in .RData image
+
+
+#saving R data in .Rdata file to save the variables used in the present tool
+objects2save = c("xset","zipfile","listOFlistArguments","md5sumList","sampleNamesList")
+save(list=objects2save[objects2save %in% ls()], file=xsetRdataOutput)
+
+cat("\n\n")
+
+
+cat("\tDONE\n")
+