Mercurial > repos > lecorguille > xcms_xcmsset

--- a/README.rst	Tue Jan 07 08:16:02 2020 -0500
+++ b/README.rst	Wed Feb 12 08:30:58 2020 -0500
@@ -24,6 +24,10 @@

 .. _News: https://bioconductor.org/packages/release/bioc/news/xcms/NEWS

+**Version 3.6.1.0 - 03/09/2019**
+
+- UPGRADE: upgrade the xcms version from 3.4.4 to 3.6.1 (see XCMS News_)
+
 **Version 3.4.4.1 - 30/04/2019**

 - BUGFIX: remove the pre-compute of the chromatograms which was memory consuming. Now, only xcms plot chromatogram will generate the Chromatograms.
--- a/abims_xcms_xcmsSet.xml	Tue Jan 07 08:16:02 2020 -0500
+++ b/abims_xcms_xcmsSet.xml	Wed Feb 12 08:30:58 2020 -0500
@@ -1,4 +1,4 @@
-<tool id="abims_xcms_xcmsSet" name="xcms findChromPeaks (xcmsSet)" version="@WRAPPER_VERSION@.2">
+<tool id="abims_xcms_xcmsSet" name="xcms findChromPeaks (xcmsSet)" version="@TOOL_VERSION@+galaxy0">
     <description>Chromatographic peak detection</description>

     <macros>
@@ -224,7 +224,7 @@
                 <param name="ppm" value="25" />
                 <param name="peakwidth" value="20,50" />
             </conditional>
-            <expand macro="test_file_load_single_ko15"/>
+            <expand macro="test_file_load_single_file" filename="ko15"/>
             <assert_stdout>
                 <has_text text="ppm: 25" />
                 <has_text text="peakwidth: 20, 50" />
@@ -235,6 +235,7 @@
                 <has_text text="Peak Groups: 0" />
                 <has_text text="Sample classes: ." />
             </assert_stdout>
+            <!--<output name="xsetRData" file="ko15-xset.RData" />-->
         </test>
         <!-- DISABLE FOR TRAVIS
         Useful to generate test-data for the further steps
@@ -245,6 +246,7 @@
                 <param name="ppm" value="25" />
                 <param name="peakwidth" value="20,50" />
             </conditional>
+            <expand macro="test_file_load_single_file" filename="ko16"/>
             <assert_stdout>
                 <has_text text="object with 1 samples" />
                 <has_text text="Time range: 2521.7-4477.9 seconds (42-74.6 minutes)" />
@@ -253,6 +255,7 @@
                 <has_text text="Peak Groups: 0" />
                 <has_text text="Sample classes: ." />
             </assert_stdout>
+            <output name="xsetRData" file="ko16-xset.RData" />
         </test>
         <test>
             <param name="image" value="wt15-raw.RData" ftype="rdata" />
@@ -261,6 +264,7 @@
                 <param name="ppm" value="25" />
                 <param name="peakwidth" value="20,50" />
             </conditional>
+            <expand macro="test_file_load_single_file" filename="wt15"/>
             <assert_stdout>
                 <has_text text="object with 1 samples" />
                 <has_text text="Time range: 2517-4473.2 seconds (42-74.6 minutes)" />
@@ -269,6 +273,7 @@
                 <has_text text="Peak Groups: 0" />
                 <has_text text="Sample classes: ." />
             </assert_stdout>
+            <output name="xsetRData" file="wt15-xset.RData" />
         </test>
         <test>
             <param name="image" value="wt16-raw.RData" ftype="rdata" />
@@ -277,6 +282,7 @@
                 <param name="ppm" value="25" />
                 <param name="peakwidth" value="20,50" />
             </conditional>
+            <expand macro="test_file_load_single_file" filename="wt16"/>
             <assert_stdout>
                 <has_text text="object with 1 samples" />
                 <has_text text="Time range: 2521.7-4468.5 seconds (42-74.5 minutes)" />
@@ -285,6 +291,7 @@
                 <has_text text="Peak Groups: 0" />
                 <has_text text="Sample classes: ." />
             </assert_stdout>
+            <output name="xsetRData" file="wt16-xset.RData" />
         </test>
         -->
         <!-- DISABLE FOR TRAVIS
@@ -485,14 +492,13 @@

 .. _News: https://bioconductor.org/packages/release/bioc/news/xcms/NEWS

+@HELP_XCMS_NEWVERSION_3610@
+
 **Version 3.4.4.1 - 30/04/2019**

 - BUGFIX: remove the pre-compute of the chromatograms which was memory consuming. Now, only xcms plot chromatogram will generate the Chromatograms.

-**Version 3.4.4.0 - 08/02/2019**
-
-- UPGRADE: upgrade the xcms version from 3.0.0 to 3.4.4 (see XCMS News_)
-
+@HELP_XCMS_NEWVERSION_3440@

 **Version 3.0.0.0 - 08/03/2018**
--- a/lib.r	Tue Jan 07 08:16:02 2020 -0500
+++ b/lib.r	Wed Feb 12 08:30:58 2020 -0500
@@ -42,16 +42,16 @@
     chromBPI <- NULL
     chromTIC_adjusted <- NULL
     chromBPI_adjusted <- NULL
+    md5sumList <- NULL
     for(image in args$images) {

         load(image)
         # Handle infiles
         if (!exists("singlefile")) singlefile <- NULL
         if (!exists("zipfile")) zipfile <- NULL
-        rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args)
+        rawFilePath <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile, args)
         zipfile <- rawFilePath$zipfile
         singlefile <- rawFilePath$singlefile
-        retrieveRawfileInTheWorkingDirectory(singlefile, zipfile)

         if (exists("raw_data")) xdata <- raw_data
         if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*")
@@ -149,7 +149,7 @@

     par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5))

-    group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
+    group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
     names(group_colors) <- unique(xdata$sample_group)

     xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax))
@@ -170,7 +170,7 @@
     pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12)

     # Color by group
-    group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
+    group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
     if (length(group_colors) > 1) {
         names(group_colors) <- unique(xdata$sample_group)
         plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group])
@@ -239,15 +239,15 @@
     pdf(pdfname, width=16, height=10)

     # Color by group
-    group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
+    group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
     if (length(group_colors) > 1) {
         names(group_colors) <- unique(xdata$sample_group)
-        plot(chrom, col = group_colors[chrom$sample_group], main=main)
+        plot(chrom, col = group_colors[as.factor(chrom$sample_group)], main=main, peakType = "none")
         legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
     }

     # Color by sample
-    plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main)
+    plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main, peakType = "none")
     legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1)

     dev.off()
@@ -317,142 +317,43 @@
 }


-# This function check if xcms will found all the files
-#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
-checkFilesCompatibilityWithXcms <- function(directory) {
-    cat("Checking files filenames compatibilities with xmcs...\n")
-    # WHAT XCMS WILL FIND
-    filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
-    filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
-    info <- file.info(directory)
-    listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE)
-    files <- c(directory[!info$isdir], listed)
-    files_abs <- file.path(getwd(), files)
-    exists <- file.exists(files_abs)
-    files[exists] <- files_abs[exists]
-    files[exists] <- sub("//","/",files[exists])
-
-    # WHAT IS ON THE FILESYSTEM
-    filesystem_filepaths <- system(paste0("find \"",getwd(),"/",directory,"\" -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\""), intern=T)
-    filesystem_filepaths <- filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
-
-    # COMPARISON
-    if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
-        write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
-        write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
-        stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
-    }
-}
-
-
-#This function list the compatible files within the directory as xcms did
-#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
-getMSFiles <- function (directory) {
-    filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
-    filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
-    info <- file.info(directory)
-    listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE)
-    files <- c(directory[!info$isdir], listed)
-    exists <- file.exists(files)
-    files <- files[exists]
-    return(files)
-}
-
-# This function check if XML contains special caracters. It also checks integrity and completness.
-#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
-checkXmlStructure <- function (directory) {
-    cat("Checking XML structure...\n")
-
-    cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
-    capture <- system(cmd, intern=TRUE)
-
-    if (length(capture)>0){
-        #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
-        write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
-        write(capture, stderr())
-        stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files")
-    }
-
-}
-
-
-# This function check if XML contain special characters
-#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
-deleteXmlBadCharacters<- function (directory) {
-    cat("Checking Non ASCII characters in the XML...\n")
-
-    processed <- F
-    l <- system( paste0("find '",directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE)
-    for (i in l){
-        cmd <- paste("LC_ALL=C grep '[^ -~]' \"", i, "\"", sep="")
-        capture <- suppressWarnings(system(cmd, intern=TRUE))
-        if (length(capture)>0){
-            cmd <- paste("perl -i -pe 's/[^[:ascii:]]//g;'",i)
-            print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") )
-            c <- system(cmd, intern=TRUE)
-            capture <- ""
-            processed <- T
-        }
-    }
-    if (processed) cat("\n\n")
-    return(processed)
-}
-
-
 # This function will compute MD5 checksum to check the data integrity
 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
-getMd5sum <- function (directory) {
+getMd5sum <- function (files) {
     cat("Compute md5 checksum...\n")
-    # WHAT XCMS WILL FIND
-    filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
-    filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
-    info <- file.info(directory)
-    listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE)
-    files <- c(directory[!info$isdir], listed)
-    exists <- file.exists(files)
-    files <- files[exists]
-
     library(tools)
-
-    #cat("\n\n")
-
     return(as.matrix(md5sum(files)))
 }

-
-# This function get the raw file path from the arguments
-#@author Gildas Le Corguille lecorguille@sb-roscoff.fr
-getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix="") {
-  if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
-
-  if (!is.null(args[[paste0("zipfile",prefix)]])) zipfile <- args[[paste0("zipfile",prefix)]]
-
-  if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) {
-    singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath",prefix)]]
-    singlefile_sampleNames <- args[[paste0("singlefile_sampleName",prefix)]]
-  }
-  if (exists("singlefile_galaxyPaths")){
-    singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|"))
-    singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|"))
-
-    singlefile <- NULL
-    for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
-      singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
-      singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
-      # In case, an url is used to import data within Galaxy
-      singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1)
-      singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
-    }
-  }
-  return(list(zipfile=zipfile, singlefile=singlefile))
-}
-
 # This function retrieve the raw file in the working directory
 #   - if zipfile: unzip the file with its directory tree
 #   - if singlefiles: set symlink with the good filename
 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
-retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) {
+retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile, args, prefix="") {
+
+    if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
+
+    # single - if the file are passed in the command arguments -> refresh singlefile
+    if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) {
+      singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath",prefix)]],"\\|"))
+      singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName",prefix)]],"\\|"))
+
+      singlefile <- NULL
+      for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
+        singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
+        singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
+        # In case, an url is used to import data within Galaxy
+        singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1)
+        singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
+      }
+    }
+    # zipfile - if the file are passed in the command arguments -> refresh zipfile
+    if (!is.null(args[[paste0("zipfile",prefix)]]))
+      zipfile <- args[[paste0("zipfile",prefix)]]
+
+    # single
     if(!is.null(singlefile) && (length("singlefile")>0)) {
+        files <- vector()
         for (singlefile_sampleName in names(singlefile)) {
             singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
             if(!file.exists(singlefile_galaxyPath)){
@@ -462,22 +363,16 @@

             if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T)))
                 file.copy(singlefile_galaxyPath, singlefile_sampleName)
-
+            files <- c(files, singlefile_sampleName)
         }
-        directory <- "."
-
     }
+    # zipfile
     if(!is.null(zipfile) && (zipfile != "")) {
         if(!file.exists(zipfile)){
             error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")
             print(error_message)
             stop(error_message)
         }
-
-        #list all file in the zip file
-        #zip_files <- unzip(zipfile,list=T)[,"Name"]
-
-        #unzip
         suppressWarnings(unzip(zipfile, unzip="unzip"))

         #get the directory name
@@ -489,8 +384,17 @@

         cat("files_root_directory\t",directory,"\n")

+        filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
+        filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
+        info <- file.info(directory)
+        listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE)
+        files <- c(directory[!info$isdir], listed)
+        exists <- file.exists(files)
+        files <- files[exists]
+
     }
-    return (directory)
+    return(list(zipfile=zipfile, singlefile=singlefile, files=files))
+
 }
--- a/macros_xcms.xml	Tue Jan 07 08:16:02 2020 -0500
+++ b/macros_xcms.xml	Wed Feb 12 08:30:58 2020 -0500
@@ -1,11 +1,11 @@
 <?xml version="1.0"?>
 <macros>

-    <token name="@WRAPPER_VERSION@">3.4.4</token>
+    <token name="@TOOL_VERSION@">3.6.1</token>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="@WRAPPER_VERSION@">bioconductor-xcms</requirement>
-            <requirement type="package" version="1.1_4">r-batch</requirement>
+            <requirement type="package" version="@TOOL_VERSION@">bioconductor-xcms</requirement>
+            <requirement type="package" version="1.1_5">r-batch</requirement>
             <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement>
             <requirement type="package" version="6.0">unzip</requirement>
             <yield />
@@ -69,11 +69,11 @@
         </section>
     </xml>

-    <xml name="test_file_load_single_ko15">
+    <xml name="test_file_load_single_file" token_filename="">
         <section name="file_load_section">
             <conditional name="file_load_conditional">
                 <param name="file_load_select" value="yes" />
-                <param name="input" value="ko15.CDF" ftype="netcdf" />
+                <param name="input" value="@FILENAME@.CDF" ftype="netcdf" />
             </conditional>
         </section>
     </xml>
@@ -246,6 +246,17 @@

     </token>

+    <token name="@HELP_XCMS_NEWVERSION_3440@">
+**Version 3.4.4.0 - 08/02/2019**
+
+- UPGRADE: upgrade the xcms version from 3.0.0 to 3.4.4 (see XCMS News_)
+    </token>
+    <token name="@HELP_XCMS_NEWVERSION_3610@">
+**Version 3.6.1+galaxy* - 03/09/2019**
+
+- UPGRADE: upgrade the xcms version from 3.4.4 to 3.6.1 (see XCMS News_)
+    </token>
+
     <xml name="citation">
         <citations>
             <citation type="doi">10.1021/ac051437y</citation>
--- a/xcms_xcmsSet.r	Tue Jan 07 08:16:02 2020 -0500
+++ b/xcms_xcmsSet.r	Wed Feb 12 08:30:58 2020 -0500
@@ -59,12 +59,9 @@
 if (!exists("raw_data")) stop("\n\nERROR: The RData doesn't contain any object called 'raw_data' which is provided by the tool: MSnbase readMSData")

 # Handle infiles
-if (!exists("singlefile")) singlefile <- NULL
-if (!exists("zipfile")) zipfile <- NULL
-rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args)
+rawFilePath <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile, args)
 zipfile <- rawFilePath$zipfile
 singlefile <- rawFilePath$singlefile
-directory <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile)


 cat("\n\n")
@@ -76,9 +73,6 @@

 cat("\t\tCOMPUTE\n")

-## Get the full path to the files
-files <- getMSFiles(directory)
-
 cat("\t\t\tApply filter[s] (if asked)\n")
 if (exists("filterAcquisitionNumParam"))  raw_data <- filterAcquisitionNum(raw_data, filterAcquisitionNumParam[1]:filterAcquisitionNumParam[2])
 if (exists("filterRtParam")) raw_data <- filterRt(raw_data, filterRtParam)
@@ -99,9 +93,6 @@
 # Check if there are no peaks
 if (nrow(chromPeaks(xdata)) == 0) stop("No peaks were detected. You should review your settings")

-# Transform the files absolute pathways into relative pathways
-xdata@processingData@files <- sub(paste(getwd(), "/", sep="") , "", xdata@processingData@files)
-
 # Create a sampleMetada file
 sampleNamesList <- getSampleMetadata(xdata=xdata, sampleMetadataOutput="sampleMetadata.tsv")