diff lib.r @ 2:8e5f667359cb draft default tip

planemo upload for repository https://github.com/rietho/IPO commit d25c744220e416cce158161fa7dc3b0f153a5c11
author workflow4metabolomics
date Mon, 11 Sep 2023 22:37:32 +0000
parents ae8de756dfcf
children
line wrap: on
line diff
--- a/lib.r	Mon Dec 16 05:26:42 2019 -0500
+++ b/lib.r	Mon Sep 11 22:37:32 2023 +0000
@@ -1,12 +1,12 @@
 #@author G. Le Corguille
 # solve an issue with batch if arguments are logical TRUE/FALSE
 parseCommandArgs <- function(...) {
-    args <- batch::parseCommandArgs(...)
-    for (key in names(args)) {
-        if (args[key] %in% c("TRUE","FALSE"))
-            args[key] = as.logical(args[key])
-    }
-    return(args)
+  args <- batch::parseCommandArgs(...)
+  for (key in names(args)) {
+    if (args[key] %in% c("TRUE", "FALSE"))
+      args[key] <- as.logical(args[key])
+  }
+  return(args)
 }
 
 #@author G. Le Corguille
@@ -14,95 +14,99 @@
 # - load the packages
 # - display the sessionInfo
 loadAndDisplayPackages <- function(pkgs) {
-    for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE)))
-    sessioninfo = sessionInfo()
-    cat(sessioninfo$R.version$version.string,"\n")
-    cat("Main packages:\n")
-    for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
-    cat("Other loaded packages:\n")
-    for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
+  for (pkg in pkgs) suppressPackageStartupMessages(stopifnot(library(pkg, quietly = TRUE, logical.return = TRUE, character.only = TRUE)))
+  sessioninfo <- sessionInfo()
+  cat(sessioninfo$R.version$version.string, "\n")
+  cat("Main packages:\n")
+  for (pkg in names(sessioninfo$otherPkgs)) {
+    cat(paste(pkg, packageVersion(pkg)), "\t")
+  }
+  cat("\n")
+  cat("Other loaded packages:\n")
+  for (pkg in names(sessioninfo$loadedOnly)) {
+    cat(paste(pkg, packageVersion(pkg)), "\t")
+  }
+  cat("\n")
 }
 
 ##
 ## This function launch IPO functions to get the best parameters for xcmsSet
 ## A sample among the whole dataset is used to save time
 ##
-ipo4xcmsSet = function(directory, parametersOutput, args, samplebyclass=4) {
-    setwd(directory)
+ipo4xcmsSet <- function(directory, parametersOutput, args, samplebyclass = 4) {
+  setwd(directory)
 
-    files = list.files(".", recursive=T)  # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF"
-    files = files[!files %in% c("conda_activate.log", "log.txt")]
-    files_classes = basename(dirname(files))    # "KO", "KO", "WT", "WT"
+  files <- list.files(".", recursive = TRUE)  # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF"
+  files <- files[!files %in% c("conda_activate.log", "log.txt")]
+  files_classes <- basename(dirname(files))    # "KO", "KO", "WT", "WT"
 
-    mzmlfile = files
-    if (samplebyclass > 0) {
-        #random selection of N files for IPO in each class
-        classes<-unique(basename(dirname(files)))
-        mzmlfile = NULL
-        for (class_i in classes){
-            files_class_i = files[files_classes==class_i]
-            if (samplebyclass > length(files_class_i)) {
-                mzmlfile = c(mzmlfile, files_class_i)
-            } else {
-                mzmlfile = c(mzmlfile,sample(files_class_i,samplebyclass))
-            }
-        }
+  mzmlfile <- files
+  if (samplebyclass > 0) {
+    #random selection of N files for IPO in each class
+    classes <- unique(basename(dirname(files)))
+    mzmlfile <- NULL
+    for (class_i in classes){
+      files_class_i <- files[files_classes == class_i]
+      if (samplebyclass > length(files_class_i)) {
+        mzmlfile <- c(mzmlfile, files_class_i)
+      } else {
+        mzmlfile <- c(mzmlfile, sample(files_class_i, samplebyclass))
+      }
     }
-    #@TODO: else, must we keep the RData to been use directly by group?
+  }
+  #@TODO: else, must we keep the RData to been use directly by group?
 
-    cat("\t\tSamples used:\n")
-    print(mzmlfile)
+  cat("\t\tSamples used:\n")
+  print(mzmlfile)
 
-    peakpickingParameters = getDefaultXcmsSetStartingParams(args$method) #get default parameters of IPO
+  peakpickingParameters <- getDefaultXcmsSetStartingParams(args$method) #get default parameters of IPO
 
-    # filter args to only get releavant parameters and complete with those that are not declared
-    peakpickingParametersUser = c(args[names(args) %in% names(peakpickingParameters)], peakpickingParameters[!(names(peakpickingParameters) %in% names(args))])
-    peakpickingParametersUser$verbose.columns = TRUE
-    #peakpickingParametersUser$profparam <- list(step=0.005) #not yet used by IPO have to think of it for futur improvement
-    resultPeakpicking = optimizeXcmsSet(mzmlfile, peakpickingParametersUser, nSlaves=args$nSlaves, subdir="../IPO_results") #some images generated by IPO
+  # filter args to only get releavant parameters and complete with those that are not declared
+  peakpickingParametersUser <- c(args[names(args) %in% names(peakpickingParameters)], peakpickingParameters[!(names(peakpickingParameters) %in% names(args))])
+  peakpickingParametersUser$verbose.columns <- TRUE
+  resultPeakpicking <- optimizeXcmsSet(mzmlfile, peakpickingParametersUser, nSlaves = args$nSlaves, subdir = "./IPO_results") #some images generated by IPO
 
-    # export
-    resultPeakpicking_best_settings_parameters = resultPeakpicking$best_settings$parameters[!(names(resultPeakpicking$best_settings$parameters) %in% c("nSlaves","verbose.columns"))]
-    write.table(t(as.data.frame(resultPeakpicking_best_settings_parameters)), file=parametersOutput,  sep="\t", row.names=T, col.names=F, quote=F)  #can be read by user
+  # export
+  peakpicking_best_params <- resultPeakpicking$best_settings$parameters[!(names(resultPeakpicking$best_settings$parameters) %in% c("nSlaves", "verbose.columns"))]
+  write.table(t(as.data.frame(peakpicking_best_params)), file = parametersOutput,  sep = "\t", row.names = TRUE, col.names = FALSE, quote = FALSE)  #can be read by user
 
-    return (resultPeakpicking$best_settings$xset)
+  return(resultPeakpicking$best_settings$xset)
 }
 
 ##
 ## This function launch IPO functions to get the best parameters for group and retcor
 ##
-ipo4retgroup = function(xset, directory, parametersOutput, args, samplebyclass=4) {
-    setwd(directory)
+ipo4retgroup <- function(xset, directory, parametersOutput, args, samplebyclass = 4) {
+  setwd(directory)
 
-    files = list.files(".", recursive=T)  # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF"
-    files = files[!files %in% c("conda_activate.log", "log.txt")]
-    files_classes = basename(dirname(files))    # "KO", "KO", "WT", "WT"
+  files <- list.files(".", recursive = TRUE)  # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF"
+  files <- files[!files %in% c("conda_activate.log", "log.txt")]
+  files_classes <- basename(dirname(files))    # "KO", "KO", "WT", "WT"
 
-    retcorGroupParameters = getDefaultRetGroupStartingParams(args$retcorMethod) #get default parameters of IPO
-    print(retcorGroupParameters)
-    # filter args to only get releavant parameters and complete with those that are not declared
-    retcorGroupParametersUser = c(args[names(args) %in% names(retcorGroupParameters)], retcorGroupParameters[!(names(retcorGroupParameters) %in% names(args))])
-    print("retcorGroupParametersUser")
-    print(retcorGroupParametersUser)
-    resultRetcorGroup = optimizeRetGroup(xset, retcorGroupParametersUser, nSlaves=args$nSlaves, subdir="../IPO_results") #some images generated by IPO
+  retcorGroupParameters <- getDefaultRetGroupStartingParams(args$retcorMethod) #get default parameters of IPO
+  print(retcorGroupParameters)
+  # filter args to only get releavant parameters and complete with those that are not declared
+  retcorGroupParametersUser <- c(args[names(args) %in% names(retcorGroupParameters)], retcorGroupParameters[!(names(retcorGroupParameters) %in% names(args))])
+  print("retcorGroupParametersUser")
+  print(retcorGroupParametersUser)
+  resultRetcorGroup <- optimizeRetGroup(xset, retcorGroupParametersUser, nSlaves = args$nSlaves, subdir = "../IPO_results") #some images generated by IPO
 
-    # export
-    resultRetcorGroup_best_settings_parameters = resultRetcorGroup$best_settings
-    write.table(t(as.data.frame(resultRetcorGroup_best_settings_parameters)), file=parametersOutput,  sep="\t", row.names=T, col.names=F, quote=F)  #can be read by user
+  # export
+  resultRetcorGroup_best_params <- resultRetcorGroup$best_settings
+  write.table(t(as.data.frame(resultRetcorGroup_best_params)), file = parametersOutput,  sep = "\t", row.names = TRUE, col.names = FALSE, quote = FALSE)  #can be read by user
 }
 
 
 
 # This function check if XML contains special caracters. It also checks integrity and completness.
 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
-checkXmlStructure <- function (directory) {
+checkXmlStructure <- function(directory) {
   cat("Checking XML structure...\n")
 
-  cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
-  capture <- system(cmd, intern=TRUE)
+  cmd <- paste0("IFS=$'\n'; for xml in $(find '", directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
+  capture <- system(cmd, intern = TRUE)
 
-  if (length(capture)>0){
-    #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
+  if (length(capture) > 0) {
     write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
     write(capture, stderr())
     stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files")
@@ -113,92 +117,94 @@
 
 # This function get the raw file path from the arguments
 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
-getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix="") {
-  if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
+getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix = "") {
+  if (!(prefix %in% c("", "Positive", "Negative", "MS1", "MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
 
-  if (!is.null(args[[paste0("zipfile",prefix)]])) zipfile <- args[[paste0("zipfile",prefix)]]
+  if (!is.null(args[[paste0("zipfile", prefix)]])) zipfile <- args[[paste0("zipfile", prefix)]]
 
-  if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) {
-    singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath",prefix)]]
-    singlefile_sampleNames <- args[[paste0("singlefile_sampleName",prefix)]]
+  if (!is.null(args[[paste0("singlefile_galaxyPath", prefix)]])) {
+    singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath", prefix)]]
+    singlefile_sampleNames <- args[[paste0("singlefile_sampleName", prefix)]]
   }
-  if (exists("singlefile_galaxyPaths")){
-    singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|"))
-    singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|"))
+  if (exists("singlefile_galaxyPaths")) {
+    singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths, "\\|"))
+    singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames, "\\|"))
 
     singlefile <- NULL
-    for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
+    for (singlefile_galaxyPath_i in seq_along(length(singlefile_galaxyPaths))) {
       singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
       singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
       # In case, an url is used to import data within Galaxy
-      singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1)
+      singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName, "/")), n = 1)
       singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
     }
   }
-  return(list(zipfile=zipfile, singlefile=singlefile))
+  return(list(zipfile = zipfile, singlefile = singlefile))
 }
 
 # This function retrieve the raw file in the working directory
 #   - if zipfile: unzip the file with its directory tree
 #   - if singlefiles: set symlink with the good filename
 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
-retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) {
-    if(!is.null(singlefile) && (length("singlefile")>0)) {
-        for (singlefile_sampleName in names(singlefile)) {
-            singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
-            if(!file.exists(singlefile_galaxyPath)){
-                error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!")
-                print(error_message); stop(error_message)
-            }
+retrieveRawfileInWD <- function(singlefile, zipfile) {
+  if (!is.null(singlefile) && (length("singlefile") > 0)) {
+    for (singlefile_sampleName in names(singlefile)) {
+      singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
+      if (!file.exists(singlefile_galaxyPath)) {
+        error_message <- paste("Cannot access the sample:", singlefile_sampleName, "located:", singlefile_galaxyPath, ". Please, contact your administrator ... if you have one!")
+        print(error_message)
+        stop(error_message)
+      }
 
-            if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T)))
-                file.copy(singlefile_galaxyPath, singlefile_sampleName)
-
-        }
-        directory <- "."
+      if (!suppressWarnings(try(file.link(singlefile_galaxyPath, singlefile_sampleName), silent = TRUE)))
+        file.copy(singlefile_galaxyPath, singlefile_sampleName)
 
     }
-    if(!is.null(zipfile) && (zipfile != "")) {
-        if(!file.exists(zipfile)){
-            error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")
-            print(error_message)
-            stop(error_message)
-        }
+    directory <- "."
 
-        #list all file in the zip file
-        #zip_files <- unzip(zipfile,list=T)[,"Name"]
+  }
+  if (!is.null(zipfile) && (zipfile != "")) {
+    if (!file.exists(zipfile)) {
+      error_message <- paste(
+        "Cannot access the Zip file:",
+        zipfile,
+        ". Please, contact your administrator ... if you have one!"
+      )
+      print(error_message)
+      stop(error_message)
+    }
 
-        #unzip
-        suppressWarnings(unzip(zipfile, unzip="unzip"))
+    #unzip
+    suppressWarnings(unzip(zipfile, unzip = "unzip"))
 
-        #get the directory name
-        suppressWarnings(filesInZip <- unzip(zipfile, list=T))
-        directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])))
-        directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
-        directory <- "."
-        if (length(directories) == 1) directory <- directories
+    #get the directory name
+    suppressWarnings(filesInZip <- unzip(zipfile, list = TRUE))
+    directories <- unique(unlist(lapply(strsplit(filesInZip$Name, "/"), function(x) x[1])))
+    directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
+    directory <- "."
+    if (length(directories) == 1) directory <- directories
 
-        cat("files_root_directory\t",directory,"\n")
+    cat("files_root_directory\t", directory, "\n")
 
-    }
-    return (directory)
+  }
+  return(directory)
 }
 
 
 # This function retrieve a xset like object
 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
 getxcmsSetObject <- function(xobject) {
-    # XCMS 1.x
-    if (class(xobject) == "xcmsSet")
-        return (xobject)
-    # XCMS 3.x
-    if (class(xobject) == "XCMSnExp") {
-        # Get the legacy xcmsSet object
-        suppressWarnings(xset <- as(xobject, 'xcmsSet'))
-        if (!is.null(xset@phenoData$sample_group))
-            sampclass(xset) <- xset@phenoData$sample_group
-        else
-            sampclass(xset) <- "."
-        return (xset)
-    }
+  # XCMS 1.x
+  if (class(xobject) == "xcmsSet")
+    return(xobject)
+  # XCMS 3.x
+  if (class(xobject) == "XCMSnExp") {
+    # Get the legacy xcmsSet object
+    suppressWarnings(xset <- as(xobject, "xcmsSet"))
+    if (!is.null(xset@phenoData$sample_group))
+      sampclass(xset) <- xset@phenoData$sample_group
+    else
+      sampclass(xset) <- "."
+    return(xset)
+  }
 }