Mercurial > repos > computational-metabolomics > mspurity_createdatabase
diff spectralMatching.R @ 6:2f71b3495221 draft
"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
author | computational-metabolomics |
---|---|
date | Thu, 04 Mar 2021 12:27:21 +0000 |
parents | 3d36a391612c |
children |
line wrap: on
line diff
--- a/spectralMatching.R Fri Nov 13 10:05:38 2020 +0000 +++ b/spectralMatching.R Thu Mar 04 12:27:21 2021 +0000 @@ -3,107 +3,111 @@ library(optparse) print(sessionInfo()) # load in library spectra config -source_local <- function(fname){ argv <- commandArgs(trailingOnly=FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) } +source_local <- function(fname) { + argv <- commandArgs(trailingOnly = FALSE) + base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) + source(paste(base_dir, fname, sep = "/")) +} source_local("dbconfig.R") option_list <- list( - make_option(c("-o", "--outDir"), type="character"), - make_option("--q_dbPth", type="character"), - make_option("--l_dbPth", type="character"), + make_option(c("-o", "--outDir"), type = "character"), + make_option("--q_dbPth", type = "character"), + make_option("--l_dbPth", type = "character"), - make_option("--q_dbType", type="character", default=NA), - make_option("--l_dbType", type="character", default=NA), + make_option("--q_dbType", type = "character", default = NA), + make_option("--l_dbType", type = "character", default = NA), - make_option("--q_msp", type="character", default=NA), - make_option("--l_msp", type="character", default=NA), + make_option("--q_msp", type = "character", default = NA), + make_option("--l_msp", type = "character", default = NA), - make_option("--q_defaultDb", action="store_true"), - make_option("--l_defaultDb", action="store_true"), + make_option("--q_defaultDb", action = "store_true"), + make_option("--l_defaultDb", action = "store_true"), - make_option("--q_ppmPrec", type="double"), - make_option("--l_ppmPrec", type="double"), + make_option("--q_ppmPrec", type = "double"), + make_option("--l_ppmPrec", type = "double"), - make_option("--q_ppmProd", type="double"), - make_option("--l_ppmProd", type="double"), + make_option("--q_ppmProd", type = "double"), + make_option("--l_ppmProd", type = "double"), - make_option("--q_raThres", type="double", default=NA), - make_option("--l_raThres", type="double", default=NA), + make_option("--q_raThres", type = "double", default = NA), + make_option("--l_raThres", type = "double", default = NA), - make_option("--q_polarity", type="character", default=NA), - make_option("--l_polarity", type="character", default=NA), + make_option("--q_polarity", type = "character", default = NA), + make_option("--l_polarity", type = "character", default = NA), - make_option("--q_purity", type="double", default=NA), - make_option("--l_purity", type="double", default=NA), + make_option("--q_purity", type = "double", default = NA), + make_option("--l_purity", type = "double", default = NA), - make_option("--q_xcmsGroups", type="character", default=NA), - make_option("--l_xcmsGroups", type="character", default=NA), + make_option("--q_xcmsGroups", type = "character", default = NA), + make_option("--l_xcmsGroups", type = "character", default = NA), - make_option("--q_pids", type="character", default=NA), - make_option("--l_pids", type="character", default=NA), + make_option("--q_pids", type = "character", default = NA), + make_option("--l_pids", type = "character", default = NA), - make_option("--q_rtrangeMin", type="double", default=NA), - make_option("--l_rtrangeMin", type="double", default=NA), + make_option("--q_rtrangeMin", type = "double", default = NA), + make_option("--l_rtrangeMin", type = "double", default = NA), - make_option("--q_rtrangeMax", type="double", default=NA), - make_option("--l_rtrangeMax", type="double", default=NA), + make_option("--q_rtrangeMax", type = "double", default = NA), + make_option("--l_rtrangeMax", type = "double", default = NA), - make_option("--q_accessions", type="character", default=NA), - make_option("--l_accessions", type="character", default=NA), + make_option("--q_accessions", type = "character", default = NA), + make_option("--l_accessions", type = "character", default = NA), - make_option("--q_sources", type="character", default=NA), - make_option("--l_sources", type="character", default=NA), + make_option("--q_sources", type = "character", default = NA), + make_option("--l_sources", type = "character", default = NA), - make_option("--q_sourcesUser", type="character", default=NA), - make_option("--l_sourcesUser", type="character", default=NA), + make_option("--q_sourcesUser", type = "character", default = NA), + make_option("--l_sourcesUser", type = "character", default = NA), - make_option("--q_instrumentTypes", type="character", default=NA), - make_option("--l_instrumentTypes", type="character", default=NA), + make_option("--q_instrumentTypes", type = "character", default = NA), + make_option("--l_instrumentTypes", type = "character", default = NA), - make_option("--q_instrumentTypesUser", type="character", default=NA), - make_option("--l_instrumentTypesUser", type="character", default=NA), + make_option("--q_instrumentTypesUser", type = "character", default = NA), + make_option("--l_instrumentTypesUser", type = "character", default = NA), - make_option("--q_instruments", type="character", default=NA), - make_option("--l_instruments", type="character", default=NA), + make_option("--q_instruments", type = "character", default = NA), + make_option("--l_instruments", type = "character", default = NA), - make_option("--q_spectraTypes", type="character", default=NA), - make_option("--l_spectraTypes", type="character", default=NA), + make_option("--q_spectraTypes", type = "character", default = NA), + make_option("--l_spectraTypes", type = "character", default = NA), - make_option("--q_spectraFilter", action="store_true"), - make_option("--l_spectraFilter", action="store_true"), + make_option("--q_spectraFilter", action = "store_true"), + make_option("--l_spectraFilter", action = "store_true"), - make_option("--usePrecursors", action="store_true"), + make_option("--usePrecursors", action = "store_true"), - make_option("--mzW", type="double"), - make_option("--raW", type="double"), + make_option("--mzW", type = "double"), + make_option("--raW", type = "double"), - make_option("--rttol", type="double", default=NA), + make_option("--rttol", type = "double", default = NA), - make_option("--updateDb", action="store_true"), - make_option("--copyDb", action="store_true"), - make_option("--cores", default=1) + make_option("--updateDb", action = "store_true"), + make_option("--copyDb", action = "store_true"), + make_option("--cores", default = 1) ) # store options -opt<- parse_args(OptionParser(option_list=option_list)) +opt <- parse_args(OptionParser(option_list = option_list)) print(opt) # check if the sqlite databases have any spectra -checkSPeakMeta <- function(dbPth, nme){ - if(is.null(dbPth)){ +checkSPeakMeta <- function(dbPth, nme) { + if (is.null(dbPth)) { return(TRUE) - }else if ((file.exists(dbPth)) & (file.info(dbPth)$size>0)){ + }else if ((file.exists(dbPth)) & (file.info(dbPth)$size > 0)) { con <- DBI::dbConnect(RSQLite::SQLite(), dbPth) - if (DBI::dbExistsTable(con, "s_peak_meta")){ - spm <- DBI::dbGetQuery(con, 'SELECT * FROM s_peak_meta ORDER BY ROWID ASC LIMIT 1') + if (DBI::dbExistsTable(con, "s_peak_meta")) { + spm <- DBI::dbGetQuery(con, "SELECT * FROM s_peak_meta ORDER BY ROWID ASC LIMIT 1") return(TRUE) - }else if(DBI::dbExistsTable(con, "library_spectra_meta")){ - spm <- DBI::dbGetQuery(con, 'SELECT * FROM library_spectra_meta ORDER BY ROWID ASC LIMIT 1') + }else if (DBI::dbExistsTable(con, "library_spectra_meta")) { + spm <- DBI::dbGetQuery(con, "SELECT * FROM library_spectra_meta ORDER BY ROWID ASC LIMIT 1") return(TRUE) }else{ - print(paste("No spectra available for ",nme)) + print(paste("No spectra available for ", nme)) return(FALSE) } }else{ @@ -111,110 +115,111 @@ return(FALSE) } - + } -addQueryNameColumn <- function(sm){ - if (is.null(sm$matchedResults) || length(sm$matchedResults)==1 || nrow(sm$matchedResults)==0){ +addQueryNameColumn <- function(sm) { + if (is.null(sm$matchedResults) || length(sm$matchedResults) == 1 || nrow(sm$matchedResults) == 0) { return(sm) } - con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth) - if (DBI::dbExistsTable(con, "s_peak_meta")){ - spm <- DBI::dbGetQuery(con, 'SELECT pid, name AS query_entry_name FROM s_peak_meta') - }else if(DBI::dbExistsTable(con, "library_spectra_meta")){ - spm <- DBI::dbGetQuery(con, 'SELECT id AS pid, name AS query_entry_name FROM library_spectra_meta') + con <- DBI::dbConnect(RSQLite::SQLite(), sm$q_dbPth) + if (DBI::dbExistsTable(con, "s_peak_meta")) { + spm <- DBI::dbGetQuery(con, "SELECT pid, name AS query_entry_name FROM s_peak_meta") + }else if (DBI::dbExistsTable(con, "library_spectra_meta")) { + spm <- DBI::dbGetQuery(con, "SELECT id AS pid, name AS query_entry_name FROM library_spectra_meta") } print(sm$matchedResults) - if ('pid' %in% colnames(sm$matchedResults)){ - sm$matchedResults <- merge(sm$matchedResults, spm, by.x='pid', by.y='pid') + if ("pid" %in% colnames(sm$matchedResults)) { + sm$matchedResults <- merge(sm$matchedResults, spm, by.x = "pid", by.y = "pid") }else{ - sm$matchedResults <- merge(sm$matchedResults, spm, by.x='qpid', by.y='pid') + sm$matchedResults <- merge(sm$matchedResults, spm, by.x = "qpid", by.y = "pid") } - + print(sm$xcmsMatchedResults) - if (is.null(sm$xcmsMatchedResults) || length(sm$xcmsMatchedResults)==1 || nrow(sm$xcmsMatchedResults)==0){ + if (is.null(sm$xcmsMatchedResults) || length(sm$xcmsMatchedResults) == 1 || nrow(sm$xcmsMatchedResults) == 0) { return(sm) }else{ - if ('pid' %in% colnames(sm$xcmsMatchedResults)){ - sm$xcmsMatchedResults<- merge(sm$xcmsMatchedResults, spm, by.x='pid', by.y='pid') + if ("pid" %in% colnames(sm$xcmsMatchedResults)) { + sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x = "pid", by.y = "pid") }else{ - sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x='qpid', by.y='pid') + sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x = "qpid", by.y = "pid") } } - + return(sm) - + } -updateDbF <- function(q_con, l_con){ - message('Adding extra details to database') - q_con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth) - if (DBI::dbExistsTable(q_con, "l_s_peak_meta")){ - l_s_peak_meta <- DBI::dbGetQuery(q_con, 'SELECT * FROM l_s_peak_meta') - colnames(l_s_peak_meta)[1] <- 'pid' +updateDbF <- function(q_con, l_con) { + message("Adding extra details to database") + q_con <- DBI::dbConnect(RSQLite::SQLite(), sm$q_dbPth) + if (DBI::dbExistsTable(q_con, "l_s_peak_meta")) { + l_s_peak_meta <- DBI::dbGetQuery(q_con, "SELECT * FROM l_s_peak_meta") + colnames(l_s_peak_meta)[1] <- "pid" } - - l_con <- DBI::dbConnect(RSQLite::SQLite(),l_dbPth) - if (DBI::dbExistsTable(l_con, "s_peaks")){ - l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse=','))) - - }else if(DBI::dbExistsTable(l_con, "library_spectra")){ + + l_con <- DBI::dbConnect(RSQLite::SQLite(), l_dbPth) + if (DBI::dbExistsTable(l_con, "s_peaks")) { + l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse = ","))) + + }else if (DBI::dbExistsTable(l_con, "library_spectra")) { l_s_peaks <- DBI::dbGetQuery(l_con, sprintf("SELECT * FROM library_spectra - WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse=','))) + WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse = ","))) }else{ - l_s_peaks = NULL + l_s_peaks <- NULL } - - if (DBI::dbExistsTable(l_con, "source")){ - l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM source') + + if (DBI::dbExistsTable(l_con, "source")) { + l_source <- DBI::dbGetQuery(l_con, "SELECT * FROM source") }else if (DBI::dbExistsTable(l_con, "library_spectra_source")) { - l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM library_spectra_source') + l_source <- DBI::dbGetQuery(l_con, "SELECT * FROM library_spectra_source") }else{ - l_source = NULL + l_source <- NULL } - - if (!is.null(l_s_peaks)){ - DBI::dbWriteTable(q_con, name='l_s_peaks', value=l_s_peaks, row.names=FALSE, append=TRUE) + + if (!is.null(l_s_peaks)) { + DBI::dbWriteTable(q_con, name = "l_s_peaks", value = l_s_peaks, row.names = FALSE, append = TRUE) } - - if (!is.null(l_source)){ - DBI::dbWriteTable(q_con, name='l_source', value=l_source, row.names=FALSE, append=TRUE) + + if (!is.null(l_source)) { + DBI::dbWriteTable(q_con, name = "l_source", value = l_source, row.names = FALSE, append = TRUE) } - - + } -extractMultiple <- function(optParam){ - if (!is.na(optParam)){ - param <- trimws(strsplit(optParam, ',')[[1]]) +extractMultiple <- function(optParam) { + if (!is.na(optParam)) { + param <- trimws(strsplit(optParam, ",")[[1]]) param <- param[param != ""] - }else{ + }else { param <- NA } return(param) } -if(!is.null(opt$q_defaultDb)){ - q_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package="msPurityData") - q_dbType <- 'sqlite' -}else if (!opt$q_dbType=='local_config'){ +if (!is.null(opt$q_defaultDb)) { + q_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package = "msPurityData") + q_dbType <- "sqlite" +}else if (!opt$q_dbType == "local_config") { q_dbType <- opt$q_dbType q_dbPth <- opt$q_dbPth } -if(!is.null(opt$l_defaultDb)){ - l_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package="msPurityData") - l_dbType <- 'sqlite' -}else if (!opt$l_dbType=='local_config'){ +if (!is.null(opt$l_defaultDb)) { + l_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package = "msPurityData") + l_dbType <- "sqlite" +}else if (!opt$l_dbType == "local_config") { l_dbType <- opt$l_dbType l_dbPth <- opt$l_dbPth } +q_spectraTypes <- extractMultiple(opt$q_spectraTypes) +l_spectraTypes <- extractMultiple(opt$l_spectraTypes) q_polarity <- extractMultiple(opt$q_polarity) l_polarity <- extractMultiple(opt$l_polarity) @@ -231,115 +236,118 @@ q_sourcesUser <- extractMultiple(opt$q_sourcesUser) l_sourcesUser <- extractMultiple(opt$l_sourcesUser) -q_sources <-c(q_sources, q_sourcesUser) -l_sources <-c(l_sources, l_sourcesUser) +q_sources <- c(q_sources, q_sourcesUser) +l_sources <- c(l_sources, l_sourcesUser) q_instrumentTypes <- extractMultiple(opt$q_instrumentTypes) l_instrumentTypes <- extractMultiple(opt$l_instrumentTypes) -q_instrumentTypes <-c(q_instrumentTypes, q_instrumentTypes) -l_instrumentTypes <-c(l_instrumentTypes, l_instrumentTypes) +q_instrumentTypes <- c(q_instrumentTypes, q_instrumentTypes) +l_instrumentTypes <- c(l_instrumentTypes, l_instrumentTypes) -if(!is.null(opt$l_spectraFilter)){ +if (!is.null(opt$l_spectraFilter)) { l_spectraFilter <- TRUE }else{ l_spectraFilter <- FALSE } -if(!is.null(opt$q_spectraFilter)){ +if (!is.null(opt$q_spectraFilter)) { q_spectraFilter <- TRUE }else{ q_spectraFilter <- FALSE } -if(!is.null(opt$updateDb)){ +if (!is.null(opt$updateDb)) { updateDb <- TRUE }else{ updateDb <- FALSE } -if(!is.null(opt$copyDb)){ +if (!is.null(opt$copyDb)) { copyDb <- TRUE }else{ copyDb <- FALSE } -if(!is.null(opt$l_rtrangeMax)){ +if (!is.null(opt$l_rtrangeMax)) { l_rtrangeMax <- opt$l_rtrangeMax }else{ l_rtrangeMax <- NA } -if(!is.null(opt$q_rtrangeMax)){ +if (!is.null(opt$q_rtrangeMax)) { q_rtrangeMax <- opt$q_rtrangeMax }else{ q_rtrangeMax <- NA } -if(!is.null(opt$l_rtrangeMin)){ +if (!is.null(opt$l_rtrangeMin)) { l_rtrangeMin <- opt$l_rtrangeMin }else{ l_rtrangeMin <- NA } -if(!is.null(opt$q_rtrangeMin)){ +if (!is.null(opt$q_rtrangeMin)) { q_rtrangeMin <- opt$q_rtrangeMin }else{ q_rtrangeMin <- NA } -q_check <- checkSPeakMeta(opt$q_dbPth, 'query') -l_check <- checkSPeakMeta(opt$l_dbPth, 'library') +q_check <- checkSPeakMeta(opt$q_dbPth, "query") +l_check <- checkSPeakMeta(opt$l_dbPth, "library") -if (q_check && l_check){ +if (q_check && l_check) { sm <- msPurity::spectralMatching( q_purity = opt$q_purity, l_purity = opt$l_purity, - + q_ppmProd = opt$q_ppmProd, l_ppmProd = opt$l_ppmProd, - + q_ppmPrec = opt$q_ppmPrec, l_ppmPrec = opt$l_ppmPrec, - + q_raThres = opt$q_raThres, l_raThres = opt$l_raThres, - + q_pol = q_polarity, l_pol = l_polarity, - + + q_spectraFilter = q_spectraFilter, + l_spectraFilter = l_spectraFilter, + q_xcmsGroups = q_xcmsGroups, l_xcmsGroups = l_xcmsGroups, - + q_pids = q_pids, l_pids = l_pids, - + q_sources = q_sources, l_sources = l_sources, - + q_instrumentTypes = q_instrumentTypes, l_instrumentTypes = l_instrumentTypes, - - q_spectraFilter= q_spectraFilter, - l_spectraFilter= l_spectraFilter, - - l_rtrange=c(l_rtrangeMin, l_rtrangeMax), - q_rtrange=c(q_rtrangeMin, q_rtrangeMax), - + + q_spectraTypes = q_spectraTypes, + l_spectraTypes = l_spectraTypes, + + l_rtrange = c(l_rtrangeMin, l_rtrangeMax), + q_rtrange = c(q_rtrangeMin, q_rtrangeMax), + q_accessions = opt$q_accessions, - l_accessions= opt$l_accessions, - + l_accessions = opt$l_accessions, + raW = opt$raW, mzW = opt$mzW, - rttol=opt$rttol, - cores=opt$cores, - - copyDb=copyDb, - updateDb=updateDb, + rttol = opt$rttol, + cores = opt$cores, + + copyDb = copyDb, + updateDb = updateDb, outPth = "db_with_spectral_matching.sqlite", - + q_dbPth = q_dbPth, q_dbType = q_dbType, q_dbName = q_dbName, @@ -347,7 +355,7 @@ q_dbUser = q_dbUser, q_dbPass = q_dbPass, q_dbPort = q_dbPort, - + l_dbPth = l_dbPth, l_dbType = l_dbType, l_dbName = l_dbName, @@ -355,15 +363,15 @@ l_dbUser = l_dbUser, l_dbPass = l_dbPass, l_dbPort = l_dbPort - + ) - + sm <- addQueryNameColumn(sm) # Get name of the query results (and merged with the data frames) - write.table(sm$matchedResults, 'matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE) - write.table(sm$xcmsMatchedResults, 'xcms_matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE) - - if(updateDb){ + write.table(sm$matchedResults, "matched_results.tsv", sep = "\t", row.names = FALSE, col.names = TRUE) + write.table(sm$xcmsMatchedResults, "xcms_matched_results.tsv", sep = "\t", row.names = FALSE, col.names = TRUE) + + if (updateDb) { updateDbF(q_con, l_con) } }