view lib.r @ 6:2443150161b4 draft default tip

planemo upload commit cfad09eb4dd6b1439b7de6a0852cd8fa22210f58
author workflow4metabolomics
date Mon, 11 Sep 2023 22:41:22 +0000
parents 4c4945bce185
children
line wrap: on
line source

# lib.r

# @author G. Le Corguille
# solve an issue with batch if arguments are logical TRUE/FALSE
parseCommandArgs <- function(...) {
  args <- batch::parseCommandArgs(...)
  for (key in names(args)) {
    if (args[key] %in% c("TRUE", "FALSE")) {
      args[key] <- as.logical(args[key])
    }
  }
  return(args)
}

# @author G. Le Corguille
# This function will
# - load the packages
# - display the sessionInfo
loadAndDisplayPackages <- function(pkgs) {
  for (pkg in pkgs) suppressPackageStartupMessages(stopifnot(library(pkg, quietly = TRUE, logical.return = TRUE, character.only = TRUE)))

  sessioninfo <- sessionInfo()
  cat(sessioninfo$R.version$version.string, "\n")
  cat("Main packages:\n")
  for (pkg in names(sessioninfo$otherPkgs)) {
    cat(paste(pkg, packageVersion(pkg)), "\t")
  }
  cat("\n")
  cat("Other loaded packages:\n")
  for (pkg in names(sessioninfo$loadedOnly)) {
    cat(paste(pkg, packageVersion(pkg)), "\t")
  }
  cat("\n")
}

# This function retrieve a xset like object
# @author Gildas Le Corguille lecorguille@sb-roscoff.fr
getxcmsSetObject <- function(xobject) {
  # XCMS 1.x
  if (class(xobject) == "xcmsSet") {
    return(xobject)
  }
  # XCMS 3.x
  if (class(xobject) == "XCMSnExp") {
    # Get the legacy xcmsSet object
    suppressWarnings(xset <- as(xobject, "xcmsSet"))
    if (is.null(xset@phenoData$sample_group)) {
      sampclass(xset) <- "."
    } else {
      sampclass(xset) <- xset@phenoData$sample_group
    }
    if (!is.null(xset@phenoData$sample_name)) {
      rownames(xset@phenoData) <- xset@phenoData$sample_name
    }
    return(xset)
  }
}

# @author G. Le Corguille
# The function create a pdf from the different png generated by diffreport
diffreport_png2pdf <- function(filebase) {
  dir.create("pdf")

  pdfEicOutput <- paste0("pdf/", filebase, "-eic_pdf.pdf")
  pdfBoxOutput <- paste0("pdf/", filebase, "-box_pdf.pdf")

  system(paste0("gm convert ", filebase, "_eic/*.png ", pdfEicOutput))
  system(paste0("gm convert ", filebase, "_box/*.png ", pdfBoxOutput))
}

# @author G. Le Corguille
# The function create a zip archive from the different png generated by diffreport
diffreport_png2zip <- function() {
  zip("eic.zip", dir(pattern = "_eic"), zip = Sys.which("zip"))
  zip("box.zip", dir(pattern = "_box"), zip = Sys.which("zip"))
}

# The function create a zip archive from the different tabular generated by diffreport
diffreport_tabular2zip <- function() {
  zip("tabular.zip", dir(pattern = "tabular/*"), zip = Sys.which("zip"))
}

# @author G. Le Corguille
# This function convert if it is required the Retention Time in minutes
RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
  if (convertRTMinute) {
    # converting the retention times (seconds) into minutes
    print("converting the retention times into minutes in the variableMetadata")
    variableMetadata[, "rt"] <- variableMetadata[, "rt"] / 60
    variableMetadata[, "rtmin"] <- variableMetadata[, "rtmin"] / 60
    variableMetadata[, "rtmax"] <- variableMetadata[, "rtmax"] / 60
  }
  return(variableMetadata)
}

# @author G. Le Corguille
# This function format ions identifiers
formatIonIdentifiers <- function(variableMetadata, numDigitsRT = 0, numDigitsMZ = 0) {
  splitDeco <- strsplit(as.character(variableMetadata$name), "_")
  idsDeco <- sapply(splitDeco, function(x) {
    deco <- unlist(x)[2]
    if (is.na(deco)) {
      return("")
    } else {
      return(paste0("_", deco))
    }
  })
  namecustom <- make.unique(paste0("M", round(variableMetadata[, "mz"], numDigitsMZ), "T", round(variableMetadata[, "rt"], numDigitsRT), idsDeco))
  variableMetadata <- cbind(name = variableMetadata$name, namecustom = namecustom, variableMetadata[, !(colnames(variableMetadata) %in% c("name"))])
  return(variableMetadata)
}

# The function annotateDiffreport without the corr function which bugs
annotatediff <- function(xset = xset, args = args, variableMetadataOutput = "variableMetadata.tsv") {
  # Resolve the bug with x11, with the function png
  options(bitmapType = "cairo")

  # Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped.
  res <- try(is.null(xset@filled))

  # ------ annot -------
  args$calcCiS <- as.logical(args$calcCiS)
  args$calcIso <- as.logical(args$calcIso)
  args$calcCaS <- as.logical(args$calcCaS)

  # common parameters
  args4annotate <- list(
    object = xset,
    nSlaves = args$nSlaves, sigma = args$sigma, perfwhm = args$perfwhm,
    maxcharge = args$maxcharge, maxiso = args$maxiso, minfrac = args$minfrac,
    ppm = args$ppm, mzabs = args$mzabs, quick = args$quick,
    polarity = args$polarity, max_peaks = args$max_peaks, intval = args$intval
  )

  if (args$quick == FALSE) {
    args4annotate <- append(
      args4annotate,
      list(
        graphMethod = args$graphMethod, cor_eic_th = args$cor_eic_th, pval = args$pval,
        calcCiS = args$calcCiS, calcIso = args$calcIso, calcCaS = args$calcCaS
      )
    )
    # no ruleset
    if (!is.null(args$multiplier)) {
      args4annotate <- append(
        args4annotate,
        list(multiplier = args$multiplier)
      )
    } else {  # ruleset
      rulset <- read.table(args$rules, h = TRUE, sep = ";")
      if (ncol(rulset) < 4) rulset <- read.table(args$rules, h = TRUE, sep = "\t")
      if (ncol(rulset) < 4) rulset <- read.table(args$rules, h = TRUE, sep = ",")
      if (ncol(rulset) < 4) {
        error_message <- "Your ruleset file seems not well formatted. The column separators accepted are ; , and tabulation"
        print(error_message)
        stop(error_message)
      }

      args4annotate <- append(
        args4annotate,
        list(rules = rulset)
      )
    }
  }


  # launch annotate
  xa <- do.call("annotate", args4annotate)
  peakList <- getPeaklist(xa, intval = args$intval)
  peakList <- cbind(groupnames(xa@xcmsSet), peakList)
  colnames(peakList)[1] <- c("name")

  # --- Multi condition : diffreport ---
  diffrepOri <- NULL
  if (!is.null(args$runDiffreport) && nlevels(sampclass(xset)) >= 2) {
    # Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped.
    res <- try(is.null(xset@filled))
    classes <- levels(sampclass(xset))
    for (i in seq_len(length(classes) - 1)) {
      for (n in seq_len(length(classes))) {
        if (i + n <= length(classes)) {
          filebase <- paste(classes[i], class2 = classes[i + n], sep = "-vs-")

          diffrep <- diffreport(
            object = xset, class1 = classes[i], class2 = classes[i + n],
            filebase = filebase, eicmax = args$eicmax, eicwidth = args$eicwidth,
            sortpval = TRUE, value = args$value, h = args$h, w = args$w, mzdec = args$mzdec, missing = 0
          )

          diffrepOri <- diffrep

          # renamming of the column rtmed to rt to fit with camera peaklist function output
          colnames(diffrep)[colnames(diffrep) == "rtmed"] <- "rt"
          colnames(diffrep)[colnames(diffrep) == "mzmed"] <- "mz"

          # combines results and reorder columns
          diffrep <- merge(peakList, diffrep[, c("name", "fold", "tstat", "pvalue")], by.x = "name", by.y = "name", sort = FALSE)
          diffrep <- cbind(diffrep[, !(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))], diffrep[, (colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))])

          diffrep <- RTSecondToMinute(diffrep, args$convertRTMinute)
          diffrep <- formatIonIdentifiers(diffrep, numDigitsRT = args$numDigitsRT, numDigitsMZ = args$numDigitsMZ)

          if (args$sortpval) {
            diffrep <- diffrep[order(diffrep$pvalue), ]
          }

          dir.create("tabular", showWarnings = FALSE)
          write.table(diffrep, sep = "\t", quote = FALSE, row.names = FALSE, file = paste("tabular/", filebase, "_tsv.tabular", sep = ""))

          if (args$eicmax != 0) {
            if (args$png2 == "pdf") {
              diffreport_png2pdf(filebase)
            }
            if (args$png2 == "zip") {
              diffreport_png2zip()
            }
          }
        }
      }
    }
    if (args$tabular2 == "zip") {
      diffreport_tabular2zip()
    }
  }

  # --- variableMetadata ---
  variableMetadata <- peakList[, !(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))]
  variableMetadata <- RTSecondToMinute(variableMetadata, args$convertRTMinute)
  variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = args$numDigitsRT, numDigitsMZ = args$numDigitsMZ)
  # if we have 2 conditions, we keep stat of diffrep
  if (!is.null(args$runDiffreport) && nlevels(sampclass(xset)) == 2) {
    variableMetadata <- merge(variableMetadata, diffrep[, c("name", "fold", "tstat", "pvalue")], by.x = "name", by.y = "name", sort = FALSE)
    if (exists("args[[\"sortpval\"]]")) {
      variableMetadata <- variableMetadata[order(variableMetadata$pvalue), ]
    }
  }

  variableMetadataOri <- variableMetadata
  write.table(variableMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = variableMetadataOutput)

  return(list("xa" = xa, "diffrep" = diffrepOri, "variableMetadata" = variableMetadataOri))
}


combinexsAnnos_function <- function(
    xaP, xaN, diffrepP = NULL, diffrepN = NULL,
    pos = TRUE, tol = 2, ruleset = NULL, keep_meta = TRUE, convertRTMinute = FALSE, numDigitsMZ = 0,
    numDigitsRT = 0, variableMetadataOutput = "variableMetadata.tsv") {
  # Load the two Rdata to extract the xset objects from positive and negative mode
  cat("\tObject xset from positive mode\n")
  print(xaP)
  cat("\n")

  cat("\tObject xset from negative mode\n")
  print(xaN)
  cat("\n")

  cat("\n")
  cat("\tCombining...\n")
  # Convert the string to numeric for creating matrix
  row <- as.numeric(strsplit(ruleset, ",")[[1]][1])
  column <- as.numeric(strsplit(ruleset, ",")[[1]][2])
  ruleset <- cbind(row, column)
  # Test if the file comes from an older version tool
  if ((!is.null(xaP)) && (!is.null(xaN))) {
    # Launch the combinexsannos function from CAMERA
    cAnnot <- combinexsAnnos(xaP, xaN, pos = pos, tol = tol, ruleset = ruleset)
  } else {
    stop("You must relauch the CAMERA.annotate step with the lastest version.")
  }

  if (pos) {
    xa <- xaP
    mode <- "neg. Mode"
  } else {
    xa <- xaN
    mode <- "pos. Mode"
  }

  peakList <- getPeaklist(xa)
  peakList <- cbind(groupnames(xa@xcmsSet), peakList)
  colnames(peakList)[1] <- c("name")
  variableMetadata <- cbind(peakList, cAnnot[, c("isotopes", "adduct", "pcgroup", mode)])
  variableMetadata <- variableMetadata[, !(colnames(variableMetadata) %in% c(sampnames(xa@xcmsSet)))]

  # Test if there are more than two classes (conditions)
  if (nlevels(sampclass(xaP@xcmsSet)) == 2 && (!is.null(diffrepN)) && (!is.null(diffrepP))) {
    diffrepP <- diffrepP[, c("name", "fold", "tstat", "pvalue")]
    colnames(diffrepP) <- paste("P.", colnames(diffrepP), sep = "")
    diffrepN <- diffrepN[, c("name", "fold", "tstat", "pvalue")]
    colnames(diffrepN) <- paste("N.", colnames(diffrepN), sep = "")

    variableMetadata <- merge(variableMetadata, diffrepP, by.x = "name", by.y = "P.name")
    variableMetadata <- merge(variableMetadata, diffrepN, by.x = "name", by.y = "N.name")
  }

  rownames(variableMetadata) <- NULL
  # TODO: checker colnames(variableMetadata)[1:2] = c("name", "mz/rt");

  variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute)
  variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = numDigitsRT, numDigitsMZ = numDigitsMZ)

  # If the user want to keep only the metabolites which match a difference
  if (keep_meta) {
    variableMetadata <- variableMetadata[variableMetadata[, c(mode)] != "", ]
  }

  # Write the output into a tsv file
  write.table(variableMetadata, sep = "\t", quote = FALSE, row.names = FALSE, file = variableMetadataOutput)
  return(variableMetadata)
}

# This function get the raw file path from the arguments
getRawfilePathFromArguments <- function(singlefile, zipfile, args) {
  if (!is.null(args$zipfile)) zipfile <- args$zipfile
  if (!is.null(args$zipfilePositive)) zipfile <- args$zipfilePositive
  if (!is.null(args$zipfileNegative)) zipfile <- args$zipfileNegative

  if (!is.null(args$singlefile_galaxyPath)) {
    singlefile_galaxyPaths <- args$singlefile_galaxyPath
    singlefile_sampleNames <- args$singlefile_sampleName
  }
  if (!is.null(args$singlefile_galaxyPathPositive)) {
    singlefile_galaxyPaths <- args$singlefile_galaxyPathPositive
    singlefile_sampleNames <- args$singlefile_sampleNamePositive
  }
  if (!is.null(args$singlefile_galaxyPathNegative)) {
    singlefile_galaxyPaths <- args$singlefile_galaxyPathNegative
    singlefile_sampleNames <- args$singlefile_sampleNameNegative
  }
  if (exists("singlefile_galaxyPaths")) {
    singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths, ","))
    singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames, ","))

    singlefile <- NULL
    for (singlefile_galaxyPath_i in seq_len(length(singlefile_galaxyPaths))) {
      singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
      singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
      singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
    }
  }
  for (argument in c(
    "zipfile", "zipfilePositive", "zipfileNegative",
    "singlefile_galaxyPath", "singlefile_sampleName",
    "singlefile_galaxyPathPositive", "singlefile_sampleNamePositive",
    "singlefile_galaxyPathNegative", "singlefile_sampleNameNegative"
  )) {
    args[[argument]] <- NULL
  }
  return(list(zipfile = zipfile, singlefile = singlefile, args = args))
}


# This function retrieve the raw file in the working directory
#   - if zipfile: unzip the file with its directory tree
#   - if singlefiles: set symlink with the good filename
retrieveRawfileInTheWorkingDir <- function(singlefile, zipfile) {
  if (!is.null(singlefile) && (length("singlefile") > 0)) {
    for (singlefile_sampleName in names(singlefile)) {
      singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
      if (!file.exists(singlefile_galaxyPath)) {
        error_message <- paste("Cannot access the sample:", singlefile_sampleName, "located:", singlefile_galaxyPath, ". Please, contact your administrator ... if you have one!")
        print(error_message)
        stop(error_message)
      }

      file.symlink(singlefile_galaxyPath, singlefile_sampleName)
    }
    directory <- "."
  }
  if (!is.null(zipfile) && (zipfile != "")) {
    if (!file.exists(zipfile)) {
      error_message <- paste("Cannot access the Zip file:", zipfile, ". Please, contact your administrator ... if you have one!")
      print(error_message)
      stop(error_message)
    }

    # unzip
    suppressWarnings(unzip(zipfile, unzip = "unzip"))

    # get the directory name
    filesInZip <- unzip(zipfile, list = TRUE)
    directories <- unique(unlist(lapply(strsplit(filesInZip$Name, "/"), function(x) x[1])))
    directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
    directory <- "."
    if (length(directories) == 1) directory <- directories

    cat("files_root_directory\t", directory, "\n")
  }
  return(directory)
}