Mercurial > repos > computational-metabolomics > mspurity_createdatabase
diff purityX.R @ 0:f52287a06c02 draft
"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit cb903cd93f9378cfb5eeb68512a54178dcea7bbc-dirty"
author | computational-metabolomics |
---|---|
date | Wed, 27 Nov 2019 13:44:58 -0500 |
parents | |
children | 2f71b3495221 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/purityX.R Wed Nov 27 13:44:58 2019 -0500 @@ -0,0 +1,184 @@ +library(msPurity) +library(optparse) +print(sessionInfo()) + +option_list <- list( + make_option(c("--xset_path"), type="character"), + make_option(c("-o", "--out_dir"), type="character"), + make_option(c("--mzML_path"), type="character"), + make_option("--minOffset", default=0.5), + make_option("--maxOffset", default=0.5), + make_option("--ilim", default=0.05), + make_option("--iwNorm", default="none", type="character"), + make_option("--exclude_isotopes", action="store_true"), + make_option("--isotope_matrix", type="character"), + make_option("--purityType", default="purityFWHMmedian"), + make_option("--singleFile", default=0), + make_option("--cores", default=4), + make_option("--xgroups", type="character"), + make_option("--rdata_name", default='xset'), + make_option("--camera_xcms", default='xset'), + make_option("--files", type="character"), + make_option("--galaxy_files", type="character"), + make_option("--choose_class", type="character"), + make_option("--ignore_files", type="character"), + make_option("--rtraw_columns", action="store_true") +) + + +opt<- parse_args(OptionParser(option_list=option_list)) +print(opt) + + +if (!is.null(opt$xgroups)){ + xgroups = as.numeric(strsplit(opt$xgroups, ',')[[1]]) +}else{ + xgroups = NULL +} + + + +print(xgroups) + +if (!is.null(opt$remove_nas)){ + df <- df[!is.na(df$mz),] +} + +if (is.null(opt$isotope_matrix)){ + im <- NULL +}else{ + im <- read.table(opt$isotope_matrix, + header = TRUE, sep='\t', stringsAsFactors = FALSE) +} + +if (is.null(opt$exclude_isotopes)){ + isotopes <- FALSE +}else{ + isotopes <- TRUE +} + +if (is.null(opt$rtraw_columns)){ + rtraw_columns <- FALSE +}else{ + rtraw_columns <- TRUE +} + +loadRData <- function(rdata_path, xset_name){ +#loads an RData file, and returns the named xset object if it is there + load(rdata_path) + return(get(ls()[ls() == xset_name])) +} + +target_obj <- loadRData(opt$xset_path, opt$rdata_name) + +if (opt$camera_xcms=='camera'){ + xset <- target_obj@xcmsSet +}else{ + xset <- target_obj +} + +print(xset) + +minOffset = as.numeric(opt$minOffset) +maxOffset = as.numeric(opt$maxOffset) + + +if (opt$iwNorm=='none'){ + iwNorm = FALSE + iwNormFun = NULL +}else if (opt$iwNorm=='gauss'){ + iwNorm = TRUE + iwNormFun = msPurity::iwNormGauss(minOff=-minOffset, maxOff=maxOffset) +}else if (opt$iwNorm=='rcosine'){ + iwNorm = TRUE + iwNormFun = msPurity::iwNormRcosine(minOff=-minOffset, maxOff=maxOffset) +}else if (opt$iwNorm=='QE5'){ + iwNorm = TRUE + iwNormFun = msPurity::iwNormQE.5() +} + +print(xset@filepaths) + +if (!is.null(opt$files)){ + updated_filepaths <- trimws(strsplit(opt$files, ',')[[1]]) + updated_filepaths <- updated_filepaths[updated_filepaths != ""] + print(updated_filepaths) + updated_filenames = basename(updated_filepaths) + original_filenames = basename(xset@filepaths) + update_idx = match(updated_filenames, original_filenames) + + if (!is.null(opt$galaxy_files)){ + galaxy_files <- trimws(strsplit(opt$galaxy_files, ',')[[1]]) + galaxy_files <- galaxy_files[galaxy_files != ""] + xset@filepaths <- galaxy_files[update_idx] + }else{ + xset@filepaths <- updated_filepaths[update_idx] + } +} + +if (!is.null(opt$choose_class)){ + classes <- trimws(strsplit(opt$choose_class, ',')[[1]]) + + + ignore_files_class <- which(!as.character(xset@phenoData$class) %in% classes) + + print('choose class') + print(ignore_files_class) +}else{ + ignore_files_class <- NA +} + +if (!is.null(opt$ignore_files)){ + ignore_files_string <- trimws(strsplit(opt$ignore_files, ',')[[1]]) + filenames <- rownames(xset@phenoData) + ignore_files <- which(filenames %in% ignore_files_string) + + ignore_files <- unique(c(ignore_files, ignore_files_class)) + ignore_files <- ignore_files[ignore_files != ""] +}else{ + if (anyNA(ignore_files_class)){ + ignore_files <- NULL + }else{ + ignore_files <- ignore_files_class + } + +} + +print('ignore_files') +print(ignore_files) + + +ppLCMS <- msPurity::purityX(xset=xset, + offsets=c(minOffset, maxOffset), + cores=opt$cores, + xgroups=xgroups, + purityType=opt$purityType, + ilim = opt$ilim, + isotopes = isotopes, + im = im, + iwNorm = iwNorm, + iwNormFun = iwNormFun, + singleFile = opt$singleFile, + fileignore = ignore_files, + rtrawColumns=rtraw_columns) + + +dfp <- ppLCMS@predictions + +# to make compatable with deconrank +colnames(dfp)[colnames(dfp)=='grpid'] = 'peakID' +colnames(dfp)[colnames(dfp)=='median'] = 'medianPurity' +colnames(dfp)[colnames(dfp)=='mean'] = 'meanPurity' +colnames(dfp)[colnames(dfp)=='sd'] = 'sdPurity' +colnames(dfp)[colnames(dfp)=='stde'] = 'sdePurity' +colnames(dfp)[colnames(dfp)=='RSD'] = 'cvPurity' +colnames(dfp)[colnames(dfp)=='pknm'] = 'pknmPurity' +if(sum(is.na(dfp$medianPurity))>0){ + dfp[is.na(dfp$medianPurity),]$medianPurity = 0 +} + + +print(head(dfp)) +write.table(dfp, file.path(opt$out_dir, 'purityX_output.tsv'), row.names=FALSE, sep='\t') + +save.image(file.path(opt$out_dir, 'purityX_output.RData'))