comparison spectralMatching.xml @ 0:5ff9d40c7a42 draft

"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit cb903cd93f9378cfb5eeb68512a54178dcea7bbc-dirty"
author computational-metabolomics
date Wed, 27 Nov 2019 12:31:31 -0500
parents
children aee10d29e82c
comparison
equal deleted inserted replaced
-1:000000000000 0:5ff9d40c7a42
1 <tool id="mspurity_spectralmatching" name="msPurity.spectralMatching" version="@TOOL_VERSION@+galaxy@GALAXY_TOOL_VERSION@">
2 <description>
3 Perform spectral matching to MS/MS spectral libraries
4 </description>
5 <macros>
6 <import>macros.xml</import>
7 </macros>
8 <expand macro="requirements"/>
9 <command detect_errors="exit_code"><![CDATA[
10 Rscript '$__tool_directory__/spectralMatching.R'
11 --outDir=.
12 --cores=\${GALAXY_SLOTS:-4}
13
14 #if $Query.q_dbPth_con.q_dbPth_select == 'msPurityData'
15 --q_defaultDb
16 #else if $Query.q_dbPth_con.q_dbPth_select == 'sqlite'
17 --q_dbPth=$Query.q_dbPth_con.q_dbPth
18 #end if
19
20 #if $Library.l_dbPth_con.l_dbPth_select == 'msPurityData'
21 --l_defaultDb
22 #else if $Library.l_dbPth_con.l_dbPth_select == 'userdb_sqlite'
23 --l_dbPth=$Library.l_dbPth_con.l_dbPth
24 #end if
25
26 --l_dbType=$Library.l_dbPth_con.l_dbPth_select
27 --q_dbType=$Query.q_dbPth_con.q_dbPth_select
28
29
30 --q_ppmPrec=$Query.q_filters.q_ppmPrec
31 --l_ppmPrec=$Library.l_filters.l_ppmPrec
32
33 --q_ppmProd=$Query.q_filters.q_ppmProd
34 --l_ppmProd=$Library.l_filters.l_ppmProd
35
36
37 #if $Query.q_filters.q_raThres_cond.q_raThres_bool
38 --q_raThres=$Query.q_filters.q_raThres_cond.q_raThres
39 #end if
40
41 #if $Library.l_filters.l_raThres_cond.l_raThres_bool
42 --l_raThres=$Library.l_filters.l_raThres_cond.l_raThres
43 #end if
44
45 #if $Query.q_filters.q_polarity_cond.q_polarity_bool
46 --q_polarity=$Query.q_filters.q_polarity_cond.q_polarity
47 #end if
48
49 #if $Library.l_filters.l_polarity_cond.l_polarity_bool
50 --l_polarity=$Library.l_filters.l_polarity_cond.l_polarity
51 #end if
52
53 #if $Query.q_filters.q_purity_cond.q_purity_bool
54 --q_purity=$Query.q_filters.q_purity_cond.q_purity
55 #end if
56
57 #if $Library.l_filters.l_purity_cond.l_purity_bool
58 --l_purity=$Library.l_filters.l_purity_cond.l_purity
59 #end if
60
61 #if $Query.q_filters.q_xcmsGroups_cond.q_xcmsGroups_bool
62 --q_xcmsGroups=$Query.q_filters.q_xcmsGroups_cond.q_xcmsGroups
63 #end if
64
65 #if $Library.l_filters.l_xcmsGroups_cond.l_xcmsGroups_bool
66 --l_xcmsGroups=$Library.l_filters.l_xcmsGroups_cond.l_xcmsGroups
67 #end if
68
69 #if $Query.q_filters.q_pids_cond.q_pids_bool
70 --q_pids=$Query.q_filters.q_pids_cond.q_pids
71 #end if
72
73 #if $Library.l_filters.l_pids_cond.l_pids_bool
74 --l_pids=$Library.l_filters.l_pids_cond.l_pids
75 #end if
76
77 #if $Query.q_filters.q_rtrange_cond.q_rtrange_bool
78 --q_rtrangeMin=$Query.q_filters.q_rtrange_cond.q_rtrangeMin
79 --q_rtrangeMax=$Query.q_filters.q_rtrange_cond.q_rtrangeMax
80 #end if
81
82 #if $Library.l_filters.l_rtrange_cond.l_rtrange_bool
83 --l_rtrangeMin=$Library.l_filters.l_rtrange_cond.l_rtrangeMin
84 --l_rtrangeMax=$Library.l_filters.l_rtrange_cond.l_rtrangeMax
85 #end if
86
87 #if $Query.q_filters.q_accessions_cond.q_accessions_bool
88 --q_accessions=$Query.q_filters.q_accessions_cond.q_accessions
89 #end if
90
91 #if $Library.l_filters.l_accessions_cond.l_accessions_bool
92 --l_accessions=$Library.l_filters.l_accessions_cond.l_accessions
93 #end if
94
95
96 #if $Query.q_filters.q_sources_cond.q_sources_bool
97 --q_sources=$Query.q_filters.q_sources_cond.q_sources
98 --q_sourcesUser='$Query.q_filters.q_sources_cond.q_sourcesUser'
99 #end if
100
101 #if $Library.l_filters.l_sources_cond.l_sources_bool
102 --l_sources=$Library.l_filters.l_sources_cond.l_sources
103 --l_sourcesUser='$Library.l_filters.l_sources_cond.l_sourcesUser'
104 #end if
105
106 #if $Query.q_filters.q_instrumentTypes_cond.q_instrumentTypes_bool
107 --q_instrumentTypes='$Query.q_filters.q_instrumentTypes_cond.q_instrumentTypes'
108 --q_instrumentTypesUser='$Query.q_filters.q_instrumentTypes_cond.q_instrumentTypesUser'
109 #end if
110
111 #if $Library.l_filters.l_instrumentTypes_cond.l_instrumentTypes_bool
112 --l_instrumentTypes='$Library.l_filters.l_instrumentTypes_cond.l_instrumentTypes'
113 --l_instrumentTypesUser='$Library.l_filters.l_instrumentTypes_cond.l_instrumentTypesUser'
114 #end if
115
116 #if $Query.q_filters.q_instruments_cond.q_instruments_bool
117 --q_instruments=$Query.q_filters.q_instruments_cond.q_instruments
118 #end if
119
120 #if $Library.l_filters.l_instruments_cond.l_instruments_bool
121 --l_instruments='$Library.l_filters.l_instruments_cond.l_instruments'
122 #end if
123
124 #if $Query.q_filters.q_spectraTypes_cond.q_spectraTypes_bool
125 --q_spectraTypes=$Query.q_filters.q_spectraTypes_cond.q_spectraTypes
126 #end if
127
128 #if $Library.l_filters.l_spectraTypes_cond.l_spectraTypes_bool
129 --l_spectraTypes=$Library.l_filters.l_spectraTypes_cond.l_spectraTypes
130 #end if
131
132 #if $Query.q_filters.q_spectraFilter
133 --q_spectraFilter
134 #end if
135
136 #if $Library.l_filters.l_spectraFilter
137 --l_spectraFilter
138 #end if
139
140 #if $General.rttol_cond.rttol_bool
141 --rttol=$General.rttol_cond.rttol
142 #end if
143
144 --raW=$General.raW
145 --mzW=$General.mzW
146
147 #if $General.updateDb_cond.updateDb
148 --updateDb
149 #if $General.updateDb_cond.copyDb
150 --copyDb
151 #end if
152 #end if
153
154 #if $General.usePrecursors
155 --usePrecursors
156 #end if
157
158 ]]></command>
159 <inputs>
160 <section name="Query" title="Query spectra input and filters" expanded="True">
161 <expand macro="sm_input" ql="Query" ql_shrt = "q" user="True" mspuritydatalib="False" msp="False"
162 help="Query SQLite database - in the standard XCMS msPurity workflow - the output
163 of msPurity.createDatabase should be used here. However any SQLite database
164 following the schema of as https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-database-vignette.html can be used as input"/>
165 <expand macro="filters" ql="Query" ql_shrt="q"/>
166 </section>
167 <section name="Library" title="Library spectra input and filters" expanded="True">
168 <expand macro="sm_input" ql="Library" ql_shrt = "l" user="False" mspuritydatalib="True" msp="False"
169 help="Library SQLite database - in the standard XCMS msPurity workflow - a default
170 database of MassBank, HMDB, LipidBlast and GNPS is used. However any SQLite
171 database following the schema of https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-database-vignette.html can be used as input"/>
172 <expand macro="filters" ql="Library" ql_shrt="l"/>
173 </section>
174 <section name="General" title="General arguments" expanded="False">
175 <conditional name="rttol_cond">
176 <param name="rttol_bool" type="boolean" label="Filter on retention time match?"
177 help="" />
178 <when value="true">
179 <param name="rttol" type="float" value="30" min="0"
180 label="Retention time tolerance (seconds)"
181 help="Retention time tolerance in seconds to match precursors"/>
182 </when>
183 <when value="false"/>
184 </conditional>
185 <param name="usePrecursors" type="boolean" checked="true" label="Filter on matching precursors?"
186 help="If True, spectra will be filtered by similarity of precursors based on
187 the library and query ppm defined tolerance" />
188 <param name="raW" label="Weighting for relative abundance" type="float" value="0.5"
189 help="Relative abundance weight for spectra (default to 0.5 as determined by
190 massbank for ESI data)"/>
191 <param name="mzW" label="Weighting for mz" type="float" min="0" value="2"
192 help="mz weight for spectra (default to 2 as determined by massbank for ESI data)"/>
193 <conditional name="updateDb_cond">
194 <param name="updateDb" type="boolean" checked="true"
195 label="Update database with results?" help="" />
196 <when value="true">
197 <param name="copyDb" type="boolean" checked="true"
198 label="Make a copy of the database?"
199 help="A copy will be made of the input SQLite target database and the
200 results will be added to this copy. When False, the input SQLite
201 database will be updated with the matching results. Use False if
202 you want to reduce storage space being used."/>
203 </when>
204 <when value="false"/>
205 </conditional>
206 </section>
207 </inputs>
208
209 <outputs>
210 <data name="sqlite_results" format="sqlite" label="${tool.name} on ${on_string}: SQLite results"
211 from_work_dir="db_with_spectral_matching.sqlite" >
212 <filter>create_new_database is True</filter>
213 </data>
214 <data name="matches" format="tsv" label="${tool.name} on ${on_string}: matches"
215 from_work_dir="matched_results.tsv" >
216 <filter>spectra_type_q == "scans"</filter>
217 </data>
218 <data name="xcms_matches" format="tsv" label="${tool.name} on ${on_string}: XCMS matches"
219 from_work_dir="xcms_matched_results.tsv" />
220 </outputs>
221 <tests>
222 <test>
223 <param name="q_dbPth" value="createDatabase_output.sqlite" />
224 <param name="l_dbPth_select" value="userdb" />
225 <param name="l_dbPth" value="PR100037.sqlite" />
226 <param name="q_xcmsGroups_bool" value="true" />
227 <param name="l_accessions_bool" value="true" />
228 <param name="q_xcmsGroups" value="14" />
229 <param name="l_accessions" value="PR100037" />
230 <output name="xcms_matches" file="spectralMatching_matched_results.tsv" />
231 <output name="matches" file="spectralMatching_xcms_matched_results.tsv" />
232 <output name="sqlite_results" value="spectralMatching_db_with_spectral_matching.sqlite" ftype="sqlite" compare="sim_size"/>
233 </test>
234 <test>
235 <param name="l_instrumentTypes_bool" value="true" />
236 <param name="q_dbPth" value="createDatabase_output.sqlite" />
237 <param name="l_dbPth_select" value="userdb" />
238 <param name="l_dbPth" value="PR100037.sqlite" />
239 <param name="q_xcmsGroups_bool" value="true" />
240 <param name="l_accessions_bool" value="true" />
241 <param name="q_xcmsGroups" value="14" />
242 <param name="l_accessions" value="PR100037" />
243 <output name="xcms_matches" file="spectralMatching_matched_results_instrumentTypes.tsv" />
244 <output name="matches" file="spectralMatching_xcms_matched_results_instrumentTypes.tsv" />
245 <output name="sqlite_results" value="spectralMatching_db_with_spectral_matching_instrumentTypes.sqlite" ftype="sqlite" compare="sim_size"/>
246 </test>
247 </tests>
248
249 <help><![CDATA[
250
251 =============================================================
252 Spectral matching
253 =============================================================
254 -----------
255 General
256 -----------
257
258
259 Perform spectral matching to spectral libraries for an LC-MS/MS dataset.
260
261 The spectral matching is performed from a **Query** SQLite spectral-database against a **Library** SQLite spectral-database.
262
263 The SQLite schema of the spectral database here: spectral_database_schema_
264
265
266 The query spectral-database in most cases should contain be the "unknown" spectra database generated the msPurity
267 function createDatabase as part of a msPurity-XCMS data processing workflow.
268
269 The library spectral-database in most cases should contain the "known" spectra from either public or user generated resources.
270 The library SQLite database by default contains data from MoNA including Massbank, HMDB, LipidBlast and GNPS.
271 A larger_database_ can be download and used from the msp2db github repository.
272
273 To create a user generated library SQLite database the following tool can be used to generate a SQLite database
274 from a collection of MSP files: msp2db_.
275
276 It should be noted though, that as long as the schema of the spectral-database is as described here, then any database can be used
277 for either the library or query - even allowing for the same database to be used.
278
279 The spectral matching functionality has four main components, spectral filtering, spectral alignment, spectral matching,
280 and summarising the results.
281
282 Spectral filtering is simply filtering both the library and query spectra to be search against (e.g. choosing
283 the library source, instrument, retention time, precursor PPM tolerance etc).
284
285 The spectral alignment stage involves aligning the query peaks to the library peaks. The approach used is similar
286 to modified pMatch algorithm described in Zhou et al 2015.
287
288 The spectral matching of the aligned spectra is performed against a combined intensity and m/z weighted vector - created for both
289 the query and library spectra (wq and wl). See below:
290
291 .. math::
292
293 w=intensity^x \cdot mz^y
294
295
296 Where x and y represent weight factors and can be adjusted with the parameters raW and mzW.
297 Defaults to x=0.5 and y=2 as per MassBank for ESI based mass spectrometry data.
298
299 The aligned weighted vectors are then matched using dot product cosine, reverse dot product cosine and the composite dot product.
300 See below for dot product cosine equation.
301
302 .. math::
303
304 dpc = \frac{ w_q \cdot w_l } { \sqrt{Σ{w_{q}{}^2} } \cdot \sqrt{Σ{w_{l}{}^2}}}
305
306
307 Full details of the matching approaches are described in the msPurity_spectral_matching_vignette_
308
309 --------------------------------------------
310 Example LC-MS/MS processing workflow
311 --------------------------------------------
312
313 * Purity assessments
314 + (mzML files) -> purityA -> (pa)
315 * XCMS processing
316 + (mzML files) -> xcms.xcmsSet -> xcms.merge -> xcms.group -> xcms.retcor -> xcms.group -> (xset)
317 * Fragmentation processing
318 + (xset, pa) -> frag4feature -> filterFragSpectra -> averageAllFragSpectra -> createDatabase -> **spectralMatching** -> (sqlite spectral database)
319
320 -----------
321 Output
322 -----------
323
324 **Database**
325
326 The updated query database (this will have been updated with the annotation results if updateDb argument used)
327
328
329 **xcmsMatchedResults**
330
331 If the qeury spectra had XCMS based chromotographic peaks tables (e.g c_peak_groups, c_peaks) in the sqlite database - it will
332 be possible to summarise the matches for each XCMS grouped feature. The dataframe contains the following columns
333
334 * lpid - id in database of library spectra
335 * qpid - id in database of query spectra
336 * dpc - dot product cosine of the match
337 * rdpc - reverse dot product cosine of the match
338 * cdpc - composite dot product cosine of the match
339 * mcount - number of matching peaks
340 * allcount - total number of peaks across both query and library spectra
341 * mpercent - percentage of matching peaks across both query and library spectra
342 * library_rt - retention time of library spectra
343 * query_rt - retention time of query spectra
344 * rtdiff - difference between library and query retention time
345 * library_precursor_mz - library precursor mz
346 * query_precursor_mz - query precursor mz
347 * library_precursor_ion_purity - library precursor ion purity
348 * query_precursor_ion_purity - query precursor ion purity
349 * library_accession - library accession value (unique string or number given to eith MoNA or Massbank data entires)
350 * library_precursor_type - library precursor type (i.e. adduct)
351 * library_entry_name - Name given to the library spectra
352 * inchikey - inchikey of the matched library spectra
353 * library_source_name - source of the spectra (e.g. massbank, gnps)
354 * library_compound_name - name of compound spectra was obtained from
355
356 **matchedResults**
357
358 All matched results from the query spectra to the library spectra. Contains the same as above
359 without the XCMS details. This table is useful to observe spectral matching results
360 for all MS/MS spectra irrespective of if they are linked to XCMS MS1 features.
361
362
363 .. _spectral_database_schema: https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-datatabase-schema.html
364 .. _larger_database: https://github.com/computational-metabolomics/msp2db/releases
365 .. _msp2db: https://github.com/computational-metabolomics/msp2db/releases
366 .. _msPurity_spectral_matching_vignette: https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-lcmsms-data-processing-and-spectral-matching-vignette.html
367
368 ]]></help>
369
370 <expand macro="citations"> </expand>
371 </tool>