Mercurial > repos > computational-metabolomics > mspurity_createdatabase
diff purityX.R @ 6:2f71b3495221 draft
"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
author | computational-metabolomics |
---|---|
date | Thu, 04 Mar 2021 12:27:21 +0000 |
parents | f52287a06c02 |
children | efd14b326007 |
line wrap: on
line diff
--- a/purityX.R Fri Nov 13 10:05:38 2020 +0000 +++ b/purityX.R Thu Mar 04 12:27:21 2021 +0000 @@ -3,67 +3,66 @@ print(sessionInfo()) option_list <- list( - make_option(c("--xset_path"), type="character"), - make_option(c("-o", "--out_dir"), type="character"), - make_option(c("--mzML_path"), type="character"), - make_option("--minOffset", default=0.5), - make_option("--maxOffset", default=0.5), - make_option("--ilim", default=0.05), - make_option("--iwNorm", default="none", type="character"), - make_option("--exclude_isotopes", action="store_true"), - make_option("--isotope_matrix", type="character"), - make_option("--purityType", default="purityFWHMmedian"), - make_option("--singleFile", default=0), - make_option("--cores", default=4), - make_option("--xgroups", type="character"), - make_option("--rdata_name", default='xset'), - make_option("--camera_xcms", default='xset'), - make_option("--files", type="character"), - make_option("--galaxy_files", type="character"), - make_option("--choose_class", type="character"), - make_option("--ignore_files", type="character"), - make_option("--rtraw_columns", action="store_true") + make_option(c("--xset_path"), type = "character"), + make_option(c("-o", "--out_dir"), type = "character"), + make_option(c("--mzML_path"), type = "character"), + make_option("--minOffset", default = 0.5), + make_option("--maxOffset", default = 0.5), + make_option("--ilim", default = 0.05), + make_option("--iwNorm", default = "none", type = "character"), + make_option("--exclude_isotopes", action = "store_true"), + make_option("--isotope_matrix", type = "character"), + make_option("--purityType", default = "purityFWHMmedian"), + make_option("--singleFile", default = 0), + make_option("--cores", default = 4), + make_option("--xgroups", type = "character"), + make_option("--rdata_name", default = "xset"), + make_option("--camera_xcms", default = "xset"), + make_option("--files", type = "character"), + make_option("--galaxy_files", type = "character"), + make_option("--choose_class", type = "character"), + make_option("--ignore_files", type = "character"), + make_option("--rtraw_columns", action = "store_true") ) -opt<- parse_args(OptionParser(option_list=option_list)) +opt <- parse_args(OptionParser(option_list = option_list)) print(opt) -if (!is.null(opt$xgroups)){ - xgroups = as.numeric(strsplit(opt$xgroups, ',')[[1]]) +if (!is.null(opt$xgroups)) { + xgroups <- as.numeric(strsplit(opt$xgroups, ",")[[1]]) }else{ - xgroups = NULL + xgroups <- NULL } - print(xgroups) -if (!is.null(opt$remove_nas)){ - df <- df[!is.na(df$mz),] +if (!is.null(opt$remove_nas)) { + df <- df[!is.na(df$mz), ] } -if (is.null(opt$isotope_matrix)){ +if (is.null(opt$isotope_matrix)) { im <- NULL }else{ im <- read.table(opt$isotope_matrix, - header = TRUE, sep='\t', stringsAsFactors = FALSE) + header = TRUE, sep = "\t", stringsAsFactors = FALSE) } -if (is.null(opt$exclude_isotopes)){ +if (is.null(opt$exclude_isotopes)) { isotopes <- FALSE }else{ isotopes <- TRUE } -if (is.null(opt$rtraw_columns)){ +if (is.null(opt$rtraw_columns)) { rtraw_columns <- FALSE }else{ rtraw_columns <- TRUE } -loadRData <- function(rdata_path, xset_name){ +loadRData <- function(rdata_path, xset_name) { #loads an RData file, and returns the named xset object if it is there load(rdata_path) return(get(ls()[ls() == xset_name])) @@ -71,7 +70,7 @@ target_obj <- loadRData(opt$xset_path, opt$rdata_name) -if (opt$camera_xcms=='camera'){ +if (opt$camera_xcms == "camera") { xset <- target_obj@xcmsSet }else{ xset <- target_obj @@ -79,36 +78,35 @@ print(xset) -minOffset = as.numeric(opt$minOffset) -maxOffset = as.numeric(opt$maxOffset) - +minOffset <- as.numeric(opt$minOffset) +maxOffset <- as.numeric(opt$maxOffset) -if (opt$iwNorm=='none'){ - iwNorm = FALSE - iwNormFun = NULL -}else if (opt$iwNorm=='gauss'){ - iwNorm = TRUE - iwNormFun = msPurity::iwNormGauss(minOff=-minOffset, maxOff=maxOffset) -}else if (opt$iwNorm=='rcosine'){ - iwNorm = TRUE - iwNormFun = msPurity::iwNormRcosine(minOff=-minOffset, maxOff=maxOffset) -}else if (opt$iwNorm=='QE5'){ - iwNorm = TRUE - iwNormFun = msPurity::iwNormQE.5() +if (opt$iwNorm == "none") { + iwNorm <- FALSE + iwNormFun <- NULL +}else if (opt$iwNorm == "gauss") { + iwNorm <- TRUE + iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset) +}else if (opt$iwNorm == "rcosine") { + iwNorm <- TRUE + iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset) +}else if (opt$iwNorm == "QE5") { + iwNorm <- TRUE + iwNormFun <- msPurity::iwNormQE.5() } print(xset@filepaths) -if (!is.null(opt$files)){ - updated_filepaths <- trimws(strsplit(opt$files, ',')[[1]]) +if (!is.null(opt$files)) { + updated_filepaths <- trimws(strsplit(opt$files, ",")[[1]]) updated_filepaths <- updated_filepaths[updated_filepaths != ""] print(updated_filepaths) - updated_filenames = basename(updated_filepaths) - original_filenames = basename(xset@filepaths) - update_idx = match(updated_filenames, original_filenames) + updated_filenames <- basename(updated_filepaths) + original_filenames <- basename(xset@filepaths) + update_idx <- match(updated_filenames, original_filenames) - if (!is.null(opt$galaxy_files)){ - galaxy_files <- trimws(strsplit(opt$galaxy_files, ',')[[1]]) + if (!is.null(opt$galaxy_files)) { + galaxy_files <- trimws(strsplit(opt$galaxy_files, ",")[[1]]) galaxy_files <- galaxy_files[galaxy_files != ""] xset@filepaths <- galaxy_files[update_idx] }else{ @@ -116,27 +114,26 @@ } } -if (!is.null(opt$choose_class)){ - classes <- trimws(strsplit(opt$choose_class, ',')[[1]]) - +if (!is.null(opt$choose_class)) { + classes <- trimws(strsplit(opt$choose_class, ",")[[1]]) ignore_files_class <- which(!as.character(xset@phenoData$class) %in% classes) - print('choose class') + print("choose class") print(ignore_files_class) }else{ ignore_files_class <- NA } -if (!is.null(opt$ignore_files)){ - ignore_files_string <- trimws(strsplit(opt$ignore_files, ',')[[1]]) +if (!is.null(opt$ignore_files)) { + ignore_files_string <- trimws(strsplit(opt$ignore_files, ",")[[1]]) filenames <- rownames(xset@phenoData) ignore_files <- which(filenames %in% ignore_files_string) ignore_files <- unique(c(ignore_files, ignore_files_class)) ignore_files <- ignore_files[ignore_files != ""] }else{ - if (anyNA(ignore_files_class)){ + if (anyNA(ignore_files_class)) { ignore_files <- NULL }else{ ignore_files <- ignore_files_class @@ -144,41 +141,40 @@ } -print('ignore_files') +print("ignore_files") print(ignore_files) -ppLCMS <- msPurity::purityX(xset=xset, - offsets=c(minOffset, maxOffset), - cores=opt$cores, - xgroups=xgroups, - purityType=opt$purityType, - ilim = opt$ilim, - isotopes = isotopes, - im = im, - iwNorm = iwNorm, - iwNormFun = iwNormFun, - singleFile = opt$singleFile, - fileignore = ignore_files, - rtrawColumns=rtraw_columns) - +ppLCMS <- msPurity::purityX(xset = xset, + offsets = c(minOffset, maxOffset), + cores = opt$cores, + xgroups = xgroups, + purityType = opt$purityType, + ilim = opt$ilim, + isotopes = isotopes, + im = im, + iwNorm = iwNorm, + iwNormFun = iwNormFun, + singleFile = opt$singleFile, + fileignore = ignore_files, + rtrawColumns = rtraw_columns) dfp <- ppLCMS@predictions # to make compatable with deconrank -colnames(dfp)[colnames(dfp)=='grpid'] = 'peakID' -colnames(dfp)[colnames(dfp)=='median'] = 'medianPurity' -colnames(dfp)[colnames(dfp)=='mean'] = 'meanPurity' -colnames(dfp)[colnames(dfp)=='sd'] = 'sdPurity' -colnames(dfp)[colnames(dfp)=='stde'] = 'sdePurity' -colnames(dfp)[colnames(dfp)=='RSD'] = 'cvPurity' -colnames(dfp)[colnames(dfp)=='pknm'] = 'pknmPurity' -if(sum(is.na(dfp$medianPurity))>0){ - dfp[is.na(dfp$medianPurity),]$medianPurity = 0 +colnames(dfp)[colnames(dfp) == "grpid"] <- "peakID" +colnames(dfp)[colnames(dfp) == "median"] <- "medianPurity" +colnames(dfp)[colnames(dfp) == "mean"] <- "meanPurity" +colnames(dfp)[colnames(dfp) == "sd"] <- "sdPurity" +colnames(dfp)[colnames(dfp) == "stde"] <- "sdePurity" +colnames(dfp)[colnames(dfp) == "RSD"] <- "cvPurity" +colnames(dfp)[colnames(dfp) == "pknm"] <- "pknmPurity" + +if (sum(is.na(dfp$medianPurity)) > 0) { + dfp[is.na(dfp$medianPurity), ]$medianPurity <- 0 } +print(head(dfp)) +write.table(dfp, file.path(opt$out_dir, "purityX_output.tsv"), row.names = FALSE, sep = "\t") -print(head(dfp)) -write.table(dfp, file.path(opt$out_dir, 'purityX_output.tsv'), row.names=FALSE, sep='\t') - -save.image(file.path(opt$out_dir, 'purityX_output.RData')) +save.image(file.path(opt$out_dir, "purityX_output.RData"))