comparison spectralMatching.R @ 0:35898942bfbb draft

"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit cb903cd93f9378cfb5eeb68512a54178dcea7bbc-dirty"
author computational-metabolomics
date Wed, 27 Nov 2019 14:20:07 -0500
parents
children 196f363638ca
comparison
equal deleted inserted replaced
-1:000000000000 0:35898942bfbb
1 library(msPurity)
2 library(msPurityData)
3 library(optparse)
4 print(sessionInfo())
5 # load in library spectra config
6 source_local <- function(fname){ argv <- commandArgs(trailingOnly=FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) }
7 source_local("dbconfig.R")
8
9 option_list <- list(
10 make_option(c("-o", "--outDir"), type="character"),
11 make_option("--q_dbPth", type="character"),
12 make_option("--l_dbPth", type="character"),
13
14 make_option("--q_dbType", type="character", default=NA),
15 make_option("--l_dbType", type="character", default=NA),
16
17 make_option("--q_msp", type="character", default=NA),
18 make_option("--l_msp", type="character", default=NA),
19
20 make_option("--q_defaultDb", action="store_true"),
21 make_option("--l_defaultDb", action="store_true"),
22
23 make_option("--q_ppmPrec", type="double"),
24 make_option("--l_ppmPrec", type="double"),
25
26 make_option("--q_ppmProd", type="double"),
27 make_option("--l_ppmProd", type="double"),
28
29 make_option("--q_raThres", type="double", default=NA),
30 make_option("--l_raThres", type="double", default=NA),
31
32 make_option("--q_polarity", type="character", default=NA),
33 make_option("--l_polarity", type="character", default=NA),
34
35 make_option("--q_purity", type="double", default=NA),
36 make_option("--l_purity", type="double", default=NA),
37
38 make_option("--q_xcmsGroups", type="character", default=NA),
39 make_option("--l_xcmsGroups", type="character", default=NA),
40
41 make_option("--q_pids", type="character", default=NA),
42 make_option("--l_pids", type="character", default=NA),
43
44 make_option("--q_rtrangeMin", type="double", default=NA),
45 make_option("--l_rtrangeMin", type="double", default=NA),
46
47 make_option("--q_rtrangeMax", type="double", default=NA),
48 make_option("--l_rtrangeMax", type="double", default=NA),
49
50 make_option("--q_accessions", type="character", default=NA),
51 make_option("--l_accessions", type="character", default=NA),
52
53 make_option("--q_sources", type="character", default=NA),
54 make_option("--l_sources", type="character", default=NA),
55
56 make_option("--q_sourcesUser", type="character", default=NA),
57 make_option("--l_sourcesUser", type="character", default=NA),
58
59 make_option("--q_instrumentTypes", type="character", default=NA),
60 make_option("--l_instrumentTypes", type="character", default=NA),
61
62 make_option("--q_instrumentTypesUser", type="character", default=NA),
63 make_option("--l_instrumentTypesUser", type="character", default=NA),
64
65 make_option("--q_instruments", type="character", default=NA),
66 make_option("--l_instruments", type="character", default=NA),
67
68 make_option("--q_spectraTypes", type="character", default=NA),
69 make_option("--l_spectraTypes", type="character", default=NA),
70
71 make_option("--q_spectraFilter", action="store_true"),
72 make_option("--l_spectraFilter", action="store_true"),
73
74 make_option("--usePrecursors", action="store_true"),
75
76 make_option("--mzW", type="double"),
77 make_option("--raW", type="double"),
78
79 make_option("--rttol", type="double", default=NA),
80
81 make_option("--updateDb", action="store_true"),
82 make_option("--copyDb", action="store_true"),
83 make_option("--cores", default=1)
84
85
86 )
87
88 # store options
89 opt<- parse_args(OptionParser(option_list=option_list))
90
91 print(opt)
92
93 # check if the sqlite databases have any spectra
94 checkSPeakMeta <- function(dbPth, nme){
95 if(is.null(dbPth)){
96 return(TRUE)
97 }else if ((file.exists(dbPth)) & (file.info(dbPth)$size>0)){
98 con <- DBI::dbConnect(RSQLite::SQLite(), dbPth)
99 if (DBI::dbExistsTable(con, "s_peak_meta")){
100 spm <- DBI::dbGetQuery(con, 'SELECT * FROM s_peak_meta ORDER BY ROWID ASC LIMIT 1')
101 return(TRUE)
102 }else if(DBI::dbExistsTable(con, "library_spectra_meta")){
103 spm <- DBI::dbGetQuery(con, 'SELECT * FROM library_spectra_meta ORDER BY ROWID ASC LIMIT 1')
104 return(TRUE)
105 }else{
106 print(paste("No spectra available for ",nme))
107 return(FALSE)
108 }
109 }else{
110 print(paste("file empty or does not exist for", nme))
111 return(FALSE)
112 }
113
114
115 }
116
117
118 addQueryNameColumn <- function(sm){
119 if (is.null(sm$matchedResults) || length(sm$matchedResults)==1 || nrow(sm$matchedResults)==0){
120 return(sm)
121 }
122
123 con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth)
124 if (DBI::dbExistsTable(con, "s_peak_meta")){
125 spm <- DBI::dbGetQuery(con, 'SELECT pid, name AS query_entry_name FROM s_peak_meta')
126 }else if(DBI::dbExistsTable(con, "library_spectra_meta")){
127 spm <- DBI::dbGetQuery(con, 'SELECT id AS pid, name AS query_entry_name FROM library_spectra_meta')
128 }
129 print(sm$matchedResults)
130 if ('pid' %in% colnames(sm$matchedResults)){
131 sm$matchedResults <- merge(sm$matchedResults, spm, by.x='pid', by.y='pid')
132 }else{
133 sm$matchedResults <- merge(sm$matchedResults, spm, by.x='qpid', by.y='pid')
134 }
135
136 print(sm$xcmsMatchedResults)
137 if (is.null(sm$xcmsMatchedResults) || length(sm$xcmsMatchedResults)==1 || nrow(sm$xcmsMatchedResults)==0){
138 return(sm)
139 }else{
140 if ('pid' %in% colnames(sm$xcmsMatchedResults)){
141 sm$xcmsMatchedResults<- merge(sm$xcmsMatchedResults, spm, by.x='pid', by.y='pid')
142 }else{
143 sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x='qpid', by.y='pid')
144 }
145 }
146
147 return(sm)
148
149 }
150
151
152 updateDbF <- function(q_con, l_con){
153 message('Adding extra details to database')
154 q_con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth)
155 if (DBI::dbExistsTable(q_con, "l_s_peak_meta")){
156 l_s_peak_meta <- DBI::dbGetQuery(q_con, 'SELECT * FROM l_s_peak_meta')
157 colnames(l_s_peak_meta)[1] <- 'pid'
158 }
159
160 l_con <- DBI::dbConnect(RSQLite::SQLite(),l_dbPth)
161 if (DBI::dbExistsTable(l_con, "s_peaks")){
162 l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse=',')))
163
164 }else if(DBI::dbExistsTable(l_con, "library_spectra")){
165 l_s_peaks <- DBI::dbGetQuery(l_con, sprintf("SELECT * FROM library_spectra
166 WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse=',')))
167 }else{
168 l_s_peaks = NULL
169 }
170
171 if (DBI::dbExistsTable(l_con, "source")){
172 l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM source')
173 }else if (DBI::dbExistsTable(l_con, "library_spectra_source")) {
174 l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM library_spectra_source')
175 }else{
176 l_source = NULL
177 }
178
179 if (!is.null(l_s_peaks)){
180 DBI::dbWriteTable(q_con, name='l_s_peaks', value=l_s_peaks, row.names=FALSE, append=TRUE)
181 }
182
183 if (!is.null(l_source)){
184 DBI::dbWriteTable(q_con, name='l_source', value=l_source, row.names=FALSE, append=TRUE)
185 }
186
187
188 }
189
190
191 extractMultiple <- function(optParam){
192 if (!is.na(optParam)){
193 param <- trimws(strsplit(optParam, ',')[[1]])
194 param <- param[param != ""]
195 }else{
196 param <- NA
197 }
198 return(param)
199
200 }
201
202 if(!is.null(opt$q_defaultDb)){
203 q_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package="msPurityData")
204 q_dbType <- 'sqlite'
205 }else if (!opt$q_dbType=='local_config'){
206 q_dbType <- opt$q_dbType
207 q_dbPth <- opt$q_dbPth
208 }
209
210 if(!is.null(opt$l_defaultDb)){
211 l_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package="msPurityData")
212 l_dbType <- 'sqlite'
213 }else if (!opt$l_dbType=='local_config'){
214 l_dbType <- opt$l_dbType
215 l_dbPth <- opt$l_dbPth
216 }
217
218
219 q_polarity <- extractMultiple(opt$q_polarity)
220 l_polarity <- extractMultiple(opt$l_polarity)
221
222 q_xcmsGroups <- extractMultiple(opt$q_xcmsGroups)
223 l_xcmsGroups <- extractMultiple(opt$l_xcmsGroups)
224
225 q_pids <- extractMultiple(opt$q_pids)
226 l_pids <- extractMultiple(opt$l_pids)
227
228 q_sources <- extractMultiple(opt$q_sources)
229 l_sources <- extractMultiple(opt$l_sources)
230
231 q_sourcesUser <- extractMultiple(opt$q_sourcesUser)
232 l_sourcesUser <- extractMultiple(opt$l_sourcesUser)
233
234 q_sources <-c(q_sources, q_sourcesUser)
235 l_sources <-c(l_sources, l_sourcesUser)
236
237 q_instrumentTypes <- extractMultiple(opt$q_instrumentTypes)
238 l_instrumentTypes <- extractMultiple(opt$l_instrumentTypes)
239
240 q_instrumentTypes <-c(q_instrumentTypes, q_instrumentTypes)
241 l_instrumentTypes <-c(l_instrumentTypes, l_instrumentTypes)
242
243
244 if(!is.null(opt$l_spectraFilter)){
245 l_spectraFilter <- TRUE
246 }else{
247 l_spectraFilter <- FALSE
248 }
249
250 if(!is.null(opt$q_spectraFilter)){
251 q_spectraFilter <- TRUE
252 }else{
253 q_spectraFilter <- FALSE
254 }
255
256 if(!is.null(opt$updateDb)){
257 updateDb <- TRUE
258 }else{
259 updateDb <- FALSE
260 }
261
262 if(!is.null(opt$copyDb)){
263 copyDb <- TRUE
264 }else{
265 copyDb <- FALSE
266 }
267
268 if(!is.null(opt$l_rtrangeMax)){
269 l_rtrangeMax <- opt$l_rtrangeMax
270 }else{
271 l_rtrangeMax <- NA
272 }
273
274 if(!is.null(opt$q_rtrangeMax)){
275 q_rtrangeMax <- opt$q_rtrangeMax
276 }else{
277 q_rtrangeMax <- NA
278 }
279
280 if(!is.null(opt$l_rtrangeMin)){
281 l_rtrangeMin <- opt$l_rtrangeMin
282 }else{
283 l_rtrangeMin <- NA
284 }
285
286 if(!is.null(opt$q_rtrangeMin)){
287 q_rtrangeMin <- opt$q_rtrangeMin
288 }else{
289 q_rtrangeMin <- NA
290 }
291
292 q_check <- checkSPeakMeta(opt$q_dbPth, 'query')
293 l_check <- checkSPeakMeta(opt$l_dbPth, 'library')
294
295 if (q_check && l_check){
296 sm <- msPurity::spectralMatching(
297 q_purity = opt$q_purity,
298 l_purity = opt$l_purity,
299
300 q_ppmProd = opt$q_ppmProd,
301 l_ppmProd = opt$l_ppmProd,
302
303 q_ppmPrec = opt$q_ppmPrec,
304 l_ppmPrec = opt$l_ppmPrec,
305
306 q_raThres = opt$q_raThres,
307 l_raThres = opt$l_raThres,
308
309 q_pol = q_polarity,
310 l_pol = l_polarity,
311
312 q_xcmsGroups = q_xcmsGroups,
313 l_xcmsGroups = l_xcmsGroups,
314
315 q_pids = q_pids,
316 l_pids = l_pids,
317
318 q_sources = q_sources,
319 l_sources = l_sources,
320
321 q_instrumentTypes = q_instrumentTypes,
322 l_instrumentTypes = l_instrumentTypes,
323
324 q_spectraFilter= q_spectraFilter,
325 l_spectraFilter= l_spectraFilter,
326
327 l_rtrange=c(l_rtrangeMin, l_rtrangeMax),
328 q_rtrange=c(q_rtrangeMin, q_rtrangeMax),
329
330 q_accessions = opt$q_accessions,
331 l_accessions= opt$l_accessions,
332
333 raW = opt$raW,
334 mzW = opt$mzW,
335 rttol=opt$rttol,
336 cores=opt$cores,
337
338 copyDb=copyDb,
339 updateDb=updateDb,
340 outPth = "db_with_spectral_matching.sqlite",
341
342 q_dbPth = q_dbPth,
343 q_dbType = q_dbType,
344 q_dbName = q_dbName,
345 q_dbHost = q_dbHost,
346 q_dbUser = q_dbUser,
347 q_dbPass = q_dbPass,
348 q_dbPort = q_dbPort,
349
350 l_dbPth = l_dbPth,
351 l_dbType = l_dbType,
352 l_dbName = l_dbName,
353 l_dbHost = l_dbHost,
354 l_dbUser = l_dbUser,
355 l_dbPass = l_dbPass,
356 l_dbPort = l_dbPort
357
358 )
359
360 sm <- addQueryNameColumn(sm)
361 # Get name of the query results (and merged with the data frames)
362 write.table(sm$matchedResults, 'matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE)
363 write.table(sm$xcmsMatchedResults, 'xcms_matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE)
364
365 if(updateDb){
366 updateDbF(q_con, l_con)
367 }
368 }