view purityX.R @ 6:d4a17be5429a draft

"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
author computational-metabolomics
date Thu, 04 Mar 2021 12:34:06 +0000
parents ab65999a5430
children
line wrap: on
line source

library(msPurity)
library(optparse)
print(sessionInfo())

option_list <- list(
  make_option(c("--xset_path"), type = "character"),
  make_option(c("-o", "--out_dir"), type = "character"),
  make_option(c("--mzML_path"), type = "character"),
  make_option("--minOffset", default = 0.5),
  make_option("--maxOffset", default = 0.5),
  make_option("--ilim", default = 0.05),
  make_option("--iwNorm", default = "none", type = "character"),
  make_option("--exclude_isotopes", action = "store_true"),
  make_option("--isotope_matrix", type = "character"),
  make_option("--purityType", default = "purityFWHMmedian"),
  make_option("--singleFile", default = 0),
  make_option("--cores", default = 4),
  make_option("--xgroups", type = "character"),
  make_option("--rdata_name", default = "xset"),
  make_option("--camera_xcms", default = "xset"),
  make_option("--files", type = "character"),
  make_option("--galaxy_files", type = "character"),
  make_option("--choose_class", type = "character"),
  make_option("--ignore_files", type = "character"),
  make_option("--rtraw_columns",  action = "store_true")
)


opt <- parse_args(OptionParser(option_list = option_list))
print(opt)


if (!is.null(opt$xgroups)) {
    xgroups <- as.numeric(strsplit(opt$xgroups, ",")[[1]])
}else{
    xgroups <- NULL
}


print(xgroups)

if (!is.null(opt$remove_nas)) {
  df <- df[!is.na(df$mz), ]
}

if (is.null(opt$isotope_matrix)) {
    im <- NULL
}else{
    im <- read.table(opt$isotope_matrix,
                     header = TRUE, sep = "\t", stringsAsFactors = FALSE)
}

if (is.null(opt$exclude_isotopes)) {
    isotopes <- FALSE
}else{
    isotopes <- TRUE
}

if (is.null(opt$rtraw_columns)) {
    rtraw_columns <- FALSE
}else{
    rtraw_columns <- TRUE
}

loadRData <- function(rdata_path, xset_name) {
#loads an RData file, and returns the named xset object if it is there
    load(rdata_path)
    return(get(ls()[ls() == xset_name]))
}

target_obj <- loadRData(opt$xset_path, opt$rdata_name)

if (opt$camera_xcms == "camera") {
    xset <- target_obj@xcmsSet
}else{
    xset <- target_obj
}

print(xset)

minOffset <- as.numeric(opt$minOffset)
maxOffset <- as.numeric(opt$maxOffset)

if (opt$iwNorm == "none") {
    iwNorm <- FALSE
    iwNormFun <- NULL
}else if (opt$iwNorm == "gauss") {
    iwNorm <- TRUE
    iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset)
}else if (opt$iwNorm == "rcosine") {
    iwNorm <- TRUE
    iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset)
}else if (opt$iwNorm == "QE5") {
    iwNorm <- TRUE
    iwNormFun <- msPurity::iwNormQE.5()
}

print(xset@filepaths)

if (!is.null(opt$files)) {
  updated_filepaths <- trimws(strsplit(opt$files, ",")[[1]])
  updated_filepaths <- updated_filepaths[updated_filepaths != ""]
  print(updated_filepaths)
  updated_filenames <- basename(updated_filepaths)
  original_filenames <- basename(xset@filepaths)
  update_idx <- match(updated_filenames, original_filenames)

    if (!is.null(opt$galaxy_files)) {
        galaxy_files <- trimws(strsplit(opt$galaxy_files, ",")[[1]])
        galaxy_files <- galaxy_files[galaxy_files != ""]
        xset@filepaths <- galaxy_files[update_idx]
    }else{
        xset@filepaths <- updated_filepaths[update_idx]
    }
}

if (!is.null(opt$choose_class)) {
  classes <- trimws(strsplit(opt$choose_class, ",")[[1]])

  ignore_files_class <- which(!as.character(xset@phenoData$class) %in% classes)

  print("choose class")
  print(ignore_files_class)
}else{
  ignore_files_class <- NA
}

if (!is.null(opt$ignore_files)) {
  ignore_files_string <- trimws(strsplit(opt$ignore_files, ",")[[1]])
  filenames <- rownames(xset@phenoData)
  ignore_files <- which(filenames %in% ignore_files_string)

  ignore_files <- unique(c(ignore_files, ignore_files_class))
  ignore_files <- ignore_files[ignore_files != ""]
}else{
  if (anyNA(ignore_files_class)) {
    ignore_files <- NULL
  }else{
    ignore_files <- ignore_files_class
  }

}

print("ignore_files")
print(ignore_files)


ppLCMS <- msPurity::purityX(xset = xset,
                            offsets = c(minOffset, maxOffset),
                            cores = opt$cores,
                            xgroups = xgroups,
                            purityType = opt$purityType,
                            ilim = opt$ilim,
                            isotopes = isotopes,
                            im = im,
                            iwNorm = iwNorm,
                            iwNormFun = iwNormFun,
                            singleFile = opt$singleFile,
                            fileignore = ignore_files,
                            rtrawColumns = rtraw_columns)

dfp <- ppLCMS@predictions

# to make compatable with deconrank
colnames(dfp)[colnames(dfp) == "grpid"] <- "peakID"
colnames(dfp)[colnames(dfp) == "median"] <- "medianPurity"
colnames(dfp)[colnames(dfp) == "mean"] <- "meanPurity"
colnames(dfp)[colnames(dfp) == "sd"] <- "sdPurity"
colnames(dfp)[colnames(dfp) == "stde"] <- "sdePurity"
colnames(dfp)[colnames(dfp) == "RSD"] <- "cvPurity"
colnames(dfp)[colnames(dfp) == "pknm"] <- "pknmPurity"

if (sum(is.na(dfp$medianPurity)) > 0) {
  dfp[is.na(dfp$medianPurity), ]$medianPurity <- 0
}

print(head(dfp))
write.table(dfp, file.path(opt$out_dir, "purityX_output.tsv"), row.names = FALSE, sep = "\t")

save.image(file.path(opt$out_dir, "purityX_output.RData"))