Mercurial > repos > computational-metabolomics > mspurity_createdatabase
diff dimsPredictPuritySingle.R @ 6:2f71b3495221 draft
"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
author | computational-metabolomics |
---|---|
date | Thu, 04 Mar 2021 12:27:21 +0000 |
parents | f52287a06c02 |
children | efd14b326007 |
line wrap: on
line diff
--- a/dimsPredictPuritySingle.R Fri Nov 13 10:05:38 2020 +0000 +++ b/dimsPredictPuritySingle.R Thu Mar 04 12:27:21 2021 +0000 @@ -3,27 +3,27 @@ print(sessionInfo()) option_list <- list( - make_option(c("--mzML_file"), type="character"), - make_option(c("--mzML_files"), type="character"), - make_option(c("--mzML_filename"), type="character", default=''), - make_option(c("--mzML_galaxy_names"), type="character", default=''), - make_option(c("--peaks_file"), type="character"), - make_option(c("-o", "--out_dir"), type="character"), - make_option("--minoffset", default=0.5), - make_option("--maxoffset", default=0.5), - make_option("--ilim", default=0.05), - make_option("--ppm", default=4), - make_option("--dimspy", action="store_true"), - make_option("--sim", action="store_true"), - make_option("--remove_nas", action="store_true"), - make_option("--iwNorm", default="none", type="character"), - make_option("--file_num_dimspy", default=1), - make_option("--exclude_isotopes", action="store_true"), - make_option("--isotope_matrix", type="character") + make_option(c("--mzML_file"), type = "character"), + make_option(c("--mzML_files"), type = "character"), + make_option(c("--mzML_filename"), type = "character", default = ""), + make_option(c("--mzML_galaxy_names"), type = "character", default = ""), + make_option(c("--peaks_file"), type = "character"), + make_option(c("-o", "--out_dir"), type = "character"), + make_option("--minoffset", default = 0.5), + make_option("--maxoffset", default = 0.5), + make_option("--ilim", default = 0.05), + make_option("--ppm", default = 4), + make_option("--dimspy", action = "store_true"), + make_option("--sim", action = "store_true"), + make_option("--remove_nas", action = "store_true"), + make_option("--iwNorm", default = "none", type = "character"), + make_option("--file_num_dimspy", default = 1), + make_option("--exclude_isotopes", action = "store_true"), + make_option("--isotope_matrix", type = "character") ) # store options -opt<- parse_args(OptionParser(option_list=option_list)) +opt <- parse_args(OptionParser(option_list = option_list)) print(sessionInfo()) print(opt) @@ -31,119 +31,118 @@ print(opt$mzML_files) print(opt$mzML_galaxy_names) -str_to_vec <- function(x){ +str_to_vec <- function(x) { print(x) - x <- trimws(strsplit(x, ',')[[1]]) + x <- trimws(strsplit(x, ",")[[1]]) return(x[x != ""]) } -find_mzml_file <- function(mzML_files, galaxy_names, mzML_filename){ +find_mzml_file <- function(mzML_files, galaxy_names, mzML_filename) { mzML_filename <- trimws(mzML_filename) mzML_files <- str_to_vec(mzML_files) galaxy_names <- str_to_vec(galaxy_names) - if (mzML_filename %in% galaxy_names){ - return(mzML_files[galaxy_names==mzML_filename]) + if (mzML_filename %in% galaxy_names) { + return(mzML_files[galaxy_names == mzML_filename]) }else{ stop(paste("mzML file not found - ", mzML_filename)) } } -if (is.null(opt$dimspy)){ - df <- read.table(opt$peaks_file, header = TRUE, sep='\t') - if (file.exists(opt$mzML_file)){ +if (is.null(opt$dimspy)) { + df <- read.table(opt$peaks_file, header = TRUE, sep = "\t") + if (file.exists(opt$mzML_file)) { mzML_file <- opt$mzML_file - }else if (!is.null(opt$mzML_files)){ - mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, + }else if (!is.null(opt$mzML_files)) { + mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, opt$mzML_filename) }else{ - mzML_file <- file.path(opt$mzML_file, filename) - } + mzML_file <- file.path(opt$mzML_file, filename) + } }else{ indf <- read.table(opt$peaks_file, - header = TRUE, sep='\t', stringsAsFactors = FALSE) - + header = TRUE, sep = "\t", stringsAsFactors = FALSE) + filename <- colnames(indf)[8:ncol(indf)][opt$file_num_dimspy] print(filename) # check if the data file is mzML or RAW (can only use mzML currently) so # we expect an mzML file of the same name in the same folder - indf$i <- indf[,colnames(indf)==filename] - indf[,colnames(indf)==filename] <- as.numeric(indf[,colnames(indf)==filename]) - - filename = sub("raw", "mzML", filename, ignore.case = TRUE) + indf$i <- indf[, colnames(indf) == filename] + indf[, colnames(indf) == filename] <- as.numeric(indf[, colnames(indf) == filename]) + + filename <- sub("raw", "mzML", filename, ignore.case = TRUE) print(filename) - - - if (file.exists(opt$mzML_file)){ + + if (file.exists(opt$mzML_file)) { mzML_file <- opt$mzML_file - }else if (!is.null(opt$mzML_files)){ + }else if (!is.null(opt$mzML_files)) { mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, filename) }else{ - mzML_file <- file.path(opt$mzML_file, filename) - } - - # Update the dimspy output with the correct information - df <- indf[4:nrow(indf),] - if ('blank_flag' %in% colnames(df)){ - df <- df[df$blank_flag==1,] + mzML_file <- file.path(opt$mzML_file, filename) } - colnames(df)[colnames(df)=='m.z'] <- 'mz' - - if ('nan' %in% df$mz){ - df[df$mz=='nan',]$mz <- NA + + # Update the dimspy output with the correct information + df <- indf[4:nrow(indf), ] + if ("blank_flag" %in% colnames(df)) { + df <- df[df$blank_flag == 1, ] + } + colnames(df)[colnames(df) == "m.z"] <- "mz" + + if ("nan" %in% df$mz) { + df[df$mz == "nan", ]$mz <- NA } df$mz <- as.numeric(df$mz) } -if (!is.null(opt$remove_nas)){ - df <- df[!is.na(df$mz),] +if (!is.null(opt$remove_nas)) { + df <- df[!is.na(df$mz), ] } -if (is.null(opt$isotope_matrix)){ +if (is.null(opt$isotope_matrix)) { im <- NULL }else{ im <- read.table(opt$isotope_matrix, - header = TRUE, sep='\t', stringsAsFactors = FALSE) + header = TRUE, sep = "\t", stringsAsFactors = FALSE) } -if (is.null(opt$exclude_isotopes)){ +if (is.null(opt$exclude_isotopes)) { isotopes <- FALSE }else{ isotopes <- TRUE } -if (is.null(opt$sim)){ - sim=FALSE +if (is.null(opt$sim)) { + sim <- FALSE }else{ - sim=TRUE + sim <- TRUE } -minOffset = as.numeric(opt$minoffset) -maxOffset = as.numeric(opt$maxoffset) +minOffset <- as.numeric(opt$minoffset) +maxOffset <- as.numeric(opt$maxoffset) -if (opt$iwNorm=='none'){ - iwNorm = FALSE - iwNormFun = NULL -}else if (opt$iwNorm=='gauss'){ - iwNorm = TRUE - iwNormFun = msPurity::iwNormGauss(minOff=-minOffset, maxOff=maxOffset) -}else if (opt$iwNorm=='rcosine'){ - iwNorm = TRUE - iwNormFun = msPurity::iwNormRcosine(minOff=-minOffset, maxOff=maxOffset) -}else if (opt$iwNorm=='QE5'){ - iwNorm = TRUE - iwNormFun = msPurity::iwNormQE.5() +if (opt$iwNorm == "none") { + iwNorm <- FALSE + iwNormFun <- NULL +}else if (opt$iwNorm == "gauss") { + iwNorm <- TRUE + iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset) +}else if (opt$iwNorm == "rcosine") { + iwNorm <- TRUE + iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset) +}else if (opt$iwNorm == "QE5") { + iwNorm <- TRUE + iwNormFun <- msPurity::iwNormQE.5() } -print('FIRST ROWS OF PEAK FILE') +print("FIRST ROWS OF PEAK FILE") print(head(df)) print(mzML_file) predicted <- msPurity::dimsPredictPuritySingle(df$mz, - filepth=mzML_file, - minOffset=minOffset, - maxOffset=maxOffset, - ppm=opt$ppm, - mzML=TRUE, + filepth = mzML_file, + minOffset = minOffset, + maxOffset = maxOffset, + ppm = opt$ppm, + mzML = TRUE, sim = sim, ilim = opt$ilim, isotopes = isotopes, @@ -154,10 +153,8 @@ predicted <- cbind(df, predicted) print(head(predicted)) -print(file.path(opt$out_dir, 'dimsPredictPuritySingle_output.tsv')) +print(file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv")) -write.table(predicted, - file.path(opt$out_dir, 'dimsPredictPuritySingle_output.tsv'), - row.names=FALSE, sep='\t') - - +write.table(predicted, + file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv"), + row.names = FALSE, sep = "\t")