diff spectralMatching.R @ 6:2f71b3495221 draft

"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
author computational-metabolomics
date Thu, 04 Mar 2021 12:27:21 +0000
parents 3d36a391612c
children
line wrap: on
line diff
--- a/spectralMatching.R	Fri Nov 13 10:05:38 2020 +0000
+++ b/spectralMatching.R	Thu Mar 04 12:27:21 2021 +0000
@@ -3,107 +3,111 @@
 library(optparse)
 print(sessionInfo())
 # load in library spectra config
-source_local <- function(fname){ argv <- commandArgs(trailingOnly=FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) }
+source_local <- function(fname) {
+  argv <- commandArgs(trailingOnly = FALSE)
+  base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
+  source(paste(base_dir, fname, sep = "/"))
+}
 source_local("dbconfig.R")
 
 option_list <- list(
-  make_option(c("-o", "--outDir"), type="character"),
-  make_option("--q_dbPth", type="character"),
-  make_option("--l_dbPth", type="character"),
+  make_option(c("-o", "--outDir"), type = "character"),
+  make_option("--q_dbPth", type = "character"),
+  make_option("--l_dbPth", type = "character"),
 
-  make_option("--q_dbType", type="character", default=NA),
-  make_option("--l_dbType", type="character", default=NA),
+  make_option("--q_dbType", type = "character", default = NA),
+  make_option("--l_dbType", type = "character", default = NA),
 
-  make_option("--q_msp", type="character", default=NA),
-  make_option("--l_msp", type="character", default=NA),
+  make_option("--q_msp", type = "character", default = NA),
+  make_option("--l_msp", type = "character", default = NA),
 
-  make_option("--q_defaultDb", action="store_true"),
-  make_option("--l_defaultDb", action="store_true"),
+  make_option("--q_defaultDb", action = "store_true"),
+  make_option("--l_defaultDb", action = "store_true"),
 
-  make_option("--q_ppmPrec", type="double"),
-  make_option("--l_ppmPrec", type="double"),
+  make_option("--q_ppmPrec", type = "double"),
+  make_option("--l_ppmPrec", type = "double"),
 
-  make_option("--q_ppmProd", type="double"),
-  make_option("--l_ppmProd", type="double"),
+  make_option("--q_ppmProd", type = "double"),
+  make_option("--l_ppmProd", type = "double"),
 
-  make_option("--q_raThres", type="double", default=NA),
-  make_option("--l_raThres", type="double", default=NA),
+  make_option("--q_raThres", type = "double", default = NA),
+  make_option("--l_raThres", type = "double", default = NA),
 
-  make_option("--q_polarity", type="character", default=NA),
-  make_option("--l_polarity", type="character", default=NA),
+  make_option("--q_polarity", type = "character", default = NA),
+  make_option("--l_polarity", type = "character", default = NA),
 
-  make_option("--q_purity", type="double", default=NA),
-  make_option("--l_purity", type="double", default=NA),
+  make_option("--q_purity", type = "double", default = NA),
+  make_option("--l_purity", type = "double", default = NA),
 
-  make_option("--q_xcmsGroups", type="character", default=NA),
-  make_option("--l_xcmsGroups", type="character", default=NA),
+  make_option("--q_xcmsGroups", type = "character", default = NA),
+  make_option("--l_xcmsGroups", type = "character", default = NA),
 
-  make_option("--q_pids", type="character", default=NA),
-  make_option("--l_pids", type="character", default=NA),
+  make_option("--q_pids", type = "character", default = NA),
+  make_option("--l_pids", type = "character", default = NA),
 
-  make_option("--q_rtrangeMin", type="double", default=NA),
-  make_option("--l_rtrangeMin", type="double", default=NA),
+  make_option("--q_rtrangeMin", type = "double", default = NA),
+  make_option("--l_rtrangeMin", type = "double", default = NA),
 
-  make_option("--q_rtrangeMax", type="double", default=NA),
-  make_option("--l_rtrangeMax", type="double", default=NA),
+  make_option("--q_rtrangeMax", type = "double", default = NA),
+  make_option("--l_rtrangeMax", type = "double", default = NA),
 
-  make_option("--q_accessions", type="character", default=NA),
-  make_option("--l_accessions", type="character", default=NA),
+  make_option("--q_accessions", type = "character", default = NA),
+  make_option("--l_accessions", type = "character", default = NA),
 
-  make_option("--q_sources", type="character", default=NA),
-  make_option("--l_sources", type="character", default=NA),
+  make_option("--q_sources", type = "character", default = NA),
+  make_option("--l_sources", type = "character", default = NA),
 
-  make_option("--q_sourcesUser", type="character", default=NA),
-  make_option("--l_sourcesUser", type="character", default=NA),
+  make_option("--q_sourcesUser", type = "character", default = NA),
+  make_option("--l_sourcesUser", type = "character", default = NA),
 
-  make_option("--q_instrumentTypes", type="character", default=NA),
-  make_option("--l_instrumentTypes", type="character", default=NA),
+  make_option("--q_instrumentTypes", type = "character", default = NA),
+  make_option("--l_instrumentTypes", type = "character", default = NA),
 
-  make_option("--q_instrumentTypesUser", type="character", default=NA),
-  make_option("--l_instrumentTypesUser", type="character", default=NA),
+  make_option("--q_instrumentTypesUser", type = "character", default = NA),
+  make_option("--l_instrumentTypesUser", type = "character", default = NA),
 
-  make_option("--q_instruments", type="character", default=NA),
-  make_option("--l_instruments", type="character", default=NA),
+  make_option("--q_instruments", type = "character", default = NA),
+  make_option("--l_instruments", type = "character", default = NA),
 
-  make_option("--q_spectraTypes", type="character", default=NA),
-  make_option("--l_spectraTypes", type="character", default=NA),
+  make_option("--q_spectraTypes", type = "character", default = NA),
+  make_option("--l_spectraTypes", type = "character", default = NA),
 
-  make_option("--q_spectraFilter", action="store_true"),
-  make_option("--l_spectraFilter", action="store_true"),
+  make_option("--q_spectraFilter", action = "store_true"),
+  make_option("--l_spectraFilter", action = "store_true"),
 
-  make_option("--usePrecursors", action="store_true"),
+  make_option("--usePrecursors", action = "store_true"),
 
-  make_option("--mzW", type="double"),
-  make_option("--raW", type="double"),
+  make_option("--mzW", type = "double"),
+  make_option("--raW", type = "double"),
 
-  make_option("--rttol", type="double", default=NA),
+  make_option("--rttol", type = "double", default = NA),
 
-  make_option("--updateDb", action="store_true"),
-  make_option("--copyDb", action="store_true"),
-  make_option("--cores", default=1)
+  make_option("--updateDb", action = "store_true"),
+  make_option("--copyDb", action = "store_true"),
+  make_option("--cores", default = 1)
 
 
 )
 
 # store options
-opt<- parse_args(OptionParser(option_list=option_list))
+opt <- parse_args(OptionParser(option_list = option_list))
 
 print(opt)
 
 # check if the sqlite databases have any spectra
-checkSPeakMeta <- function(dbPth, nme){
-    if(is.null(dbPth)){
+checkSPeakMeta <- function(dbPth, nme) {
+    if (is.null(dbPth)) {
         return(TRUE)
-    }else if ((file.exists(dbPth)) & (file.info(dbPth)$size>0)){
+    }else if ((file.exists(dbPth)) & (file.info(dbPth)$size > 0)) {
         con <- DBI::dbConnect(RSQLite::SQLite(), dbPth)
-        if (DBI::dbExistsTable(con, "s_peak_meta")){
-            spm <- DBI::dbGetQuery(con, 'SELECT  * FROM s_peak_meta ORDER BY ROWID ASC LIMIT 1')
+        if (DBI::dbExistsTable(con, "s_peak_meta")) {
+            spm <- DBI::dbGetQuery(con, "SELECT  * FROM s_peak_meta ORDER BY ROWID ASC LIMIT 1")
             return(TRUE)
-        }else if(DBI::dbExistsTable(con, "library_spectra_meta")){
-            spm <- DBI::dbGetQuery(con, 'SELECT  * FROM library_spectra_meta ORDER BY ROWID ASC LIMIT 1')
+        }else if (DBI::dbExistsTable(con, "library_spectra_meta")) {
+            spm <- DBI::dbGetQuery(con, "SELECT  * FROM library_spectra_meta ORDER BY ROWID ASC LIMIT 1")
             return(TRUE)
         }else{
-            print(paste("No spectra available for ",nme))
+            print(paste("No spectra available for ", nme))
             return(FALSE)
         }
     }else{
@@ -111,110 +115,111 @@
         return(FALSE)
     }
 
-        
+
 }
 
 
-addQueryNameColumn <- function(sm){
-    if (is.null(sm$matchedResults) || length(sm$matchedResults)==1 || nrow(sm$matchedResults)==0){
+addQueryNameColumn <- function(sm) {
+    if (is.null(sm$matchedResults) || length(sm$matchedResults) == 1 || nrow(sm$matchedResults) == 0) {
         return(sm)
     }
 
-    con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth)
-    if (DBI::dbExistsTable(con, "s_peak_meta")){
-        spm <- DBI::dbGetQuery(con, 'SELECT  pid, name AS query_entry_name FROM s_peak_meta')
-    }else if(DBI::dbExistsTable(con, "library_spectra_meta")){
-        spm <- DBI::dbGetQuery(con, 'SELECT  id AS pid, name  AS query_entry_name FROM library_spectra_meta')
+    con <- DBI::dbConnect(RSQLite::SQLite(), sm$q_dbPth)
+    if (DBI::dbExistsTable(con, "s_peak_meta")) {
+        spm <- DBI::dbGetQuery(con, "SELECT  pid, name AS query_entry_name FROM s_peak_meta")
+    }else if (DBI::dbExistsTable(con, "library_spectra_meta")) {
+        spm <- DBI::dbGetQuery(con, "SELECT  id AS pid, name  AS query_entry_name FROM library_spectra_meta")
     }
     print(sm$matchedResults)
-    if ('pid' %in% colnames(sm$matchedResults)){
-        sm$matchedResults <- merge(sm$matchedResults, spm, by.x='pid', by.y='pid')    
+    if ("pid" %in% colnames(sm$matchedResults)) {
+        sm$matchedResults <- merge(sm$matchedResults, spm, by.x = "pid", by.y = "pid")
     }else{
-        sm$matchedResults <- merge(sm$matchedResults, spm, by.x='qpid', by.y='pid')
+        sm$matchedResults <- merge(sm$matchedResults, spm, by.x = "qpid", by.y = "pid")
     }
-    
+
     print(sm$xcmsMatchedResults)
-    if (is.null(sm$xcmsMatchedResults) || length(sm$xcmsMatchedResults)==1 || nrow(sm$xcmsMatchedResults)==0){
+    if (is.null(sm$xcmsMatchedResults) || length(sm$xcmsMatchedResults) == 1 || nrow(sm$xcmsMatchedResults) == 0) {
         return(sm)
     }else{
-        if ('pid' %in% colnames(sm$xcmsMatchedResults)){
-            sm$xcmsMatchedResults<- merge(sm$xcmsMatchedResults, spm, by.x='pid', by.y='pid')    
+        if ("pid" %in% colnames(sm$xcmsMatchedResults)) {
+            sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x = "pid", by.y = "pid")
         }else{
-            sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x='qpid', by.y='pid')
+            sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x = "qpid", by.y = "pid")
         }
     }
-    
+
     return(sm)
-    
+
 }
 
 
-updateDbF <- function(q_con, l_con){
-    message('Adding extra details to database')
-    q_con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth)
-    if (DBI::dbExistsTable(q_con, "l_s_peak_meta")){
-        l_s_peak_meta <- DBI::dbGetQuery(q_con, 'SELECT  * FROM l_s_peak_meta')
-        colnames(l_s_peak_meta)[1] <- 'pid'
+updateDbF <- function(q_con, l_con) {
+    message("Adding extra details to database")
+    q_con <- DBI::dbConnect(RSQLite::SQLite(), sm$q_dbPth)
+    if (DBI::dbExistsTable(q_con, "l_s_peak_meta")) {
+        l_s_peak_meta <- DBI::dbGetQuery(q_con, "SELECT  * FROM l_s_peak_meta")
+        colnames(l_s_peak_meta)[1] <- "pid"
     }
-    
-    l_con <- DBI::dbConnect(RSQLite::SQLite(),l_dbPth)
-    if (DBI::dbExistsTable(l_con, "s_peaks")){
-        l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT  * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse=',')))
-        
-    }else if(DBI::dbExistsTable(l_con, "library_spectra")){
+
+    l_con <- DBI::dbConnect(RSQLite::SQLite(), l_dbPth)
+    if (DBI::dbExistsTable(l_con, "s_peaks")) {
+        l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT  * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse = ",")))
+
+    }else if (DBI::dbExistsTable(l_con, "library_spectra")) {
         l_s_peaks <- DBI::dbGetQuery(l_con, sprintf("SELECT  * FROM library_spectra
-                                                WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse=',')))
+                                                WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse = ",")))
     }else{
-        l_s_peaks = NULL
+        l_s_peaks <- NULL
     }
-    
-    if (DBI::dbExistsTable(l_con, "source")){
-        l_source <- DBI::dbGetQuery(l_con, 'SELECT  * FROM source')
+
+    if (DBI::dbExistsTable(l_con, "source")) {
+        l_source <- DBI::dbGetQuery(l_con, "SELECT  * FROM source")
     }else if (DBI::dbExistsTable(l_con, "library_spectra_source")) {
-        l_source <- DBI::dbGetQuery(l_con, 'SELECT  * FROM library_spectra_source')
+        l_source <- DBI::dbGetQuery(l_con, "SELECT  * FROM library_spectra_source")
     }else{
-        l_source = NULL
+        l_source <- NULL
     }
-    
-    if (!is.null(l_s_peaks)){
-        DBI::dbWriteTable(q_con, name='l_s_peaks', value=l_s_peaks, row.names=FALSE, append=TRUE)
+
+    if (!is.null(l_s_peaks)) {
+        DBI::dbWriteTable(q_con, name = "l_s_peaks", value = l_s_peaks, row.names = FALSE, append = TRUE)
     }
-    
-    if (!is.null(l_source)){
-        DBI::dbWriteTable(q_con, name='l_source', value=l_source, row.names=FALSE, append=TRUE)
+
+    if (!is.null(l_source)) {
+        DBI::dbWriteTable(q_con, name = "l_source", value = l_source, row.names = FALSE, append = TRUE)
     }
-    
-    
+
 }
 
 
-extractMultiple <- function(optParam){
-  if (!is.na(optParam)){
-     param <- trimws(strsplit(optParam, ',')[[1]])
+extractMultiple <- function(optParam) {
+  if (!is.na(optParam)) {
+     param <- trimws(strsplit(optParam, ",")[[1]])
      param <- param[param != ""]
-  }else{
+  }else {
      param <- NA
   }
   return(param)
 
 }
 
-if(!is.null(opt$q_defaultDb)){
-  q_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package="msPurityData")
-  q_dbType <- 'sqlite'
-}else if (!opt$q_dbType=='local_config'){
+if (!is.null(opt$q_defaultDb)) {
+  q_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package = "msPurityData")
+  q_dbType <- "sqlite"
+}else if (!opt$q_dbType == "local_config") {
   q_dbType <- opt$q_dbType
   q_dbPth <- opt$q_dbPth
 }
 
-if(!is.null(opt$l_defaultDb)){
-  l_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package="msPurityData")
-  l_dbType <- 'sqlite'
-}else if (!opt$l_dbType=='local_config'){
+if (!is.null(opt$l_defaultDb)) {
+  l_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package = "msPurityData")
+  l_dbType <- "sqlite"
+}else if (!opt$l_dbType == "local_config") {
   l_dbType <- opt$l_dbType
   l_dbPth <- opt$l_dbPth
 }
 
+q_spectraTypes <- extractMultiple(opt$q_spectraTypes)
+l_spectraTypes <- extractMultiple(opt$l_spectraTypes)
 
 q_polarity <- extractMultiple(opt$q_polarity)
 l_polarity <- extractMultiple(opt$l_polarity)
@@ -231,115 +236,118 @@
 q_sourcesUser <- extractMultiple(opt$q_sourcesUser)
 l_sourcesUser <- extractMultiple(opt$l_sourcesUser)
 
-q_sources <-c(q_sources, q_sourcesUser)
-l_sources <-c(l_sources, l_sourcesUser)
+q_sources <- c(q_sources, q_sourcesUser)
+l_sources <- c(l_sources, l_sourcesUser)
 
 q_instrumentTypes <- extractMultiple(opt$q_instrumentTypes)
 l_instrumentTypes <- extractMultiple(opt$l_instrumentTypes)
 
-q_instrumentTypes <-c(q_instrumentTypes, q_instrumentTypes)
-l_instrumentTypes <-c(l_instrumentTypes, l_instrumentTypes)
+q_instrumentTypes <- c(q_instrumentTypes, q_instrumentTypes)
+l_instrumentTypes <- c(l_instrumentTypes, l_instrumentTypes)
 
 
-if(!is.null(opt$l_spectraFilter)){
+if (!is.null(opt$l_spectraFilter)) {
   l_spectraFilter <- TRUE
 }else{
   l_spectraFilter <- FALSE
 }
 
-if(!is.null(opt$q_spectraFilter)){
+if (!is.null(opt$q_spectraFilter)) {
   q_spectraFilter <- TRUE
 }else{
   q_spectraFilter <- FALSE
 }
 
-if(!is.null(opt$updateDb)){
+if (!is.null(opt$updateDb)) {
   updateDb <- TRUE
 }else{
   updateDb <- FALSE
 }
 
-if(!is.null(opt$copyDb)){
+if (!is.null(opt$copyDb)) {
   copyDb <- TRUE
 }else{
   copyDb <- FALSE
 }
 
-if(!is.null(opt$l_rtrangeMax)){
+if (!is.null(opt$l_rtrangeMax)) {
   l_rtrangeMax <- opt$l_rtrangeMax
 }else{
   l_rtrangeMax <- NA
 }
 
-if(!is.null(opt$q_rtrangeMax)){
+if (!is.null(opt$q_rtrangeMax)) {
   q_rtrangeMax <- opt$q_rtrangeMax
 }else{
   q_rtrangeMax <- NA
 }
 
-if(!is.null(opt$l_rtrangeMin)){
+if (!is.null(opt$l_rtrangeMin)) {
   l_rtrangeMin <- opt$l_rtrangeMin
 }else{
   l_rtrangeMin <- NA
 }
 
-if(!is.null(opt$q_rtrangeMin)){
+if (!is.null(opt$q_rtrangeMin)) {
   q_rtrangeMin <- opt$q_rtrangeMin
 }else{
   q_rtrangeMin <- NA
 }
 
-q_check <- checkSPeakMeta(opt$q_dbPth, 'query')
-l_check <- checkSPeakMeta(opt$l_dbPth, 'library')
+q_check <- checkSPeakMeta(opt$q_dbPth, "query")
+l_check <- checkSPeakMeta(opt$l_dbPth, "library")
 
 
-if (q_check && l_check){
+if (q_check && l_check) {
     sm <- msPurity::spectralMatching(
         q_purity =  opt$q_purity,
         l_purity =  opt$l_purity,
-        
+
         q_ppmProd =  opt$q_ppmProd,
         l_ppmProd =  opt$l_ppmProd,
-        
+
         q_ppmPrec =  opt$q_ppmPrec,
         l_ppmPrec =  opt$l_ppmPrec,
-        
+
         q_raThres =  opt$q_raThres,
         l_raThres =  opt$l_raThres,
-        
+
         q_pol =  q_polarity,
         l_pol =  l_polarity,
-        
+
+        q_spectraFilter = q_spectraFilter,
+        l_spectraFilter = l_spectraFilter,
+
         q_xcmsGroups = q_xcmsGroups,
         l_xcmsGroups = l_xcmsGroups,
-        
+
         q_pids = q_pids,
         l_pids = l_pids,
-        
+
         q_sources = q_sources,
         l_sources = l_sources,
-        
+
         q_instrumentTypes = q_instrumentTypes,
         l_instrumentTypes = l_instrumentTypes,
-        
-        q_spectraFilter= q_spectraFilter,
-        l_spectraFilter= l_spectraFilter,
-        
-        l_rtrange=c(l_rtrangeMin, l_rtrangeMax),
-        q_rtrange=c(q_rtrangeMin, q_rtrangeMax),
-        
+
+        q_spectraTypes = q_spectraTypes,
+        l_spectraTypes = l_spectraTypes,
+
+        l_rtrange = c(l_rtrangeMin, l_rtrangeMax),
+        q_rtrange = c(q_rtrangeMin, q_rtrangeMax),
+
         q_accessions = opt$q_accessions,
-        l_accessions= opt$l_accessions,
-        
+        l_accessions = opt$l_accessions,
+
         raW = opt$raW,
         mzW = opt$mzW,
-        rttol=opt$rttol,
-        cores=opt$cores,
-        
-        copyDb=copyDb,
-        updateDb=updateDb,
+        rttol = opt$rttol,
+        cores = opt$cores,
+
+        copyDb = copyDb,
+        updateDb = updateDb,
         outPth = "db_with_spectral_matching.sqlite",
-        
+
         q_dbPth = q_dbPth,
         q_dbType = q_dbType,
         q_dbName = q_dbName,
@@ -347,7 +355,7 @@
         q_dbUser = q_dbUser,
         q_dbPass = q_dbPass,
         q_dbPort = q_dbPort,
-        
+
         l_dbPth = l_dbPth,
         l_dbType = l_dbType,
         l_dbName = l_dbName,
@@ -355,15 +363,15 @@
         l_dbUser = l_dbUser,
         l_dbPass = l_dbPass,
         l_dbPort = l_dbPort
-        
+
     )
-    
+
     sm <- addQueryNameColumn(sm)
     # Get name of the query results (and merged with the data frames)
-    write.table(sm$matchedResults, 'matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE)
-    write.table(sm$xcmsMatchedResults, 'xcms_matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE)
-    
-    if(updateDb){
+    write.table(sm$matchedResults, "matched_results.tsv", sep = "\t", row.names = FALSE, col.names = TRUE)
+    write.table(sm$xcmsMatchedResults, "xcms_matched_results.tsv", sep = "\t", row.names = FALSE, col.names = TRUE)
+
+    if (updateDb) {
         updateDbF(q_con, l_con)
     }
 }