diff spectralMatching.xml @ 0:5ff9d40c7a42 draft

"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit cb903cd93f9378cfb5eeb68512a54178dcea7bbc-dirty"
author computational-metabolomics
date Wed, 27 Nov 2019 12:31:31 -0500
parents
children aee10d29e82c
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spectralMatching.xml	Wed Nov 27 12:31:31 2019 -0500
@@ -0,0 +1,371 @@
+<tool id="mspurity_spectralmatching" name="msPurity.spectralMatching" version="@TOOL_VERSION@+galaxy@GALAXY_TOOL_VERSION@">
+    <description>
+        Perform spectral matching to MS/MS spectral libraries
+    </description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+        Rscript '$__tool_directory__/spectralMatching.R'
+            --outDir=.
+            --cores=\${GALAXY_SLOTS:-4}
+
+            #if $Query.q_dbPth_con.q_dbPth_select == 'msPurityData'
+              --q_defaultDb
+            #else if $Query.q_dbPth_con.q_dbPth_select == 'sqlite'
+              --q_dbPth=$Query.q_dbPth_con.q_dbPth
+            #end if
+
+            #if $Library.l_dbPth_con.l_dbPth_select == 'msPurityData'
+              --l_defaultDb
+            #else if $Library.l_dbPth_con.l_dbPth_select == 'userdb_sqlite'
+              --l_dbPth=$Library.l_dbPth_con.l_dbPth
+            #end if
+
+            --l_dbType=$Library.l_dbPth_con.l_dbPth_select
+            --q_dbType=$Query.q_dbPth_con.q_dbPth_select
+
+
+            --q_ppmPrec=$Query.q_filters.q_ppmPrec
+            --l_ppmPrec=$Library.l_filters.l_ppmPrec
+
+            --q_ppmProd=$Query.q_filters.q_ppmProd
+            --l_ppmProd=$Library.l_filters.l_ppmProd
+
+
+            #if $Query.q_filters.q_raThres_cond.q_raThres_bool
+              --q_raThres=$Query.q_filters.q_raThres_cond.q_raThres
+            #end if
+
+            #if $Library.l_filters.l_raThres_cond.l_raThres_bool
+              --l_raThres=$Library.l_filters.l_raThres_cond.l_raThres
+            #end if
+
+            #if $Query.q_filters.q_polarity_cond.q_polarity_bool
+              --q_polarity=$Query.q_filters.q_polarity_cond.q_polarity
+            #end if
+
+            #if $Library.l_filters.l_polarity_cond.l_polarity_bool
+              --l_polarity=$Library.l_filters.l_polarity_cond.l_polarity
+            #end if
+
+            #if $Query.q_filters.q_purity_cond.q_purity_bool
+              --q_purity=$Query.q_filters.q_purity_cond.q_purity
+            #end if
+
+            #if $Library.l_filters.l_purity_cond.l_purity_bool
+              --l_purity=$Library.l_filters.l_purity_cond.l_purity
+            #end if
+
+            #if $Query.q_filters.q_xcmsGroups_cond.q_xcmsGroups_bool
+              --q_xcmsGroups=$Query.q_filters.q_xcmsGroups_cond.q_xcmsGroups
+            #end if
+
+            #if $Library.l_filters.l_xcmsGroups_cond.l_xcmsGroups_bool
+              --l_xcmsGroups=$Library.l_filters.l_xcmsGroups_cond.l_xcmsGroups
+            #end if
+
+            #if $Query.q_filters.q_pids_cond.q_pids_bool
+              --q_pids=$Query.q_filters.q_pids_cond.q_pids
+            #end if
+
+            #if $Library.l_filters.l_pids_cond.l_pids_bool
+              --l_pids=$Library.l_filters.l_pids_cond.l_pids
+            #end if
+
+            #if $Query.q_filters.q_rtrange_cond.q_rtrange_bool
+              --q_rtrangeMin=$Query.q_filters.q_rtrange_cond.q_rtrangeMin
+              --q_rtrangeMax=$Query.q_filters.q_rtrange_cond.q_rtrangeMax
+            #end if
+
+            #if $Library.l_filters.l_rtrange_cond.l_rtrange_bool
+              --l_rtrangeMin=$Library.l_filters.l_rtrange_cond.l_rtrangeMin
+              --l_rtrangeMax=$Library.l_filters.l_rtrange_cond.l_rtrangeMax
+            #end if
+
+            #if $Query.q_filters.q_accessions_cond.q_accessions_bool
+              --q_accessions=$Query.q_filters.q_accessions_cond.q_accessions
+            #end if
+
+            #if $Library.l_filters.l_accessions_cond.l_accessions_bool
+              --l_accessions=$Library.l_filters.l_accessions_cond.l_accessions
+            #end if
+
+
+            #if $Query.q_filters.q_sources_cond.q_sources_bool
+              --q_sources=$Query.q_filters.q_sources_cond.q_sources
+              --q_sourcesUser='$Query.q_filters.q_sources_cond.q_sourcesUser'
+            #end if
+
+            #if $Library.l_filters.l_sources_cond.l_sources_bool
+              --l_sources=$Library.l_filters.l_sources_cond.l_sources
+              --l_sourcesUser='$Library.l_filters.l_sources_cond.l_sourcesUser'
+            #end if
+
+            #if $Query.q_filters.q_instrumentTypes_cond.q_instrumentTypes_bool
+              --q_instrumentTypes='$Query.q_filters.q_instrumentTypes_cond.q_instrumentTypes'
+              --q_instrumentTypesUser='$Query.q_filters.q_instrumentTypes_cond.q_instrumentTypesUser'
+            #end if
+
+            #if $Library.l_filters.l_instrumentTypes_cond.l_instrumentTypes_bool
+              --l_instrumentTypes='$Library.l_filters.l_instrumentTypes_cond.l_instrumentTypes'
+              --l_instrumentTypesUser='$Library.l_filters.l_instrumentTypes_cond.l_instrumentTypesUser'
+            #end if
+
+            #if $Query.q_filters.q_instruments_cond.q_instruments_bool
+              --q_instruments=$Query.q_filters.q_instruments_cond.q_instruments
+            #end if
+
+            #if $Library.l_filters.l_instruments_cond.l_instruments_bool
+              --l_instruments='$Library.l_filters.l_instruments_cond.l_instruments'
+            #end if
+
+            #if $Query.q_filters.q_spectraTypes_cond.q_spectraTypes_bool
+              --q_spectraTypes=$Query.q_filters.q_spectraTypes_cond.q_spectraTypes
+            #end if
+
+            #if $Library.l_filters.l_spectraTypes_cond.l_spectraTypes_bool
+              --l_spectraTypes=$Library.l_filters.l_spectraTypes_cond.l_spectraTypes
+            #end if
+
+            #if $Query.q_filters.q_spectraFilter
+              --q_spectraFilter
+            #end if
+
+            #if $Library.l_filters.l_spectraFilter
+              --l_spectraFilter
+            #end if
+
+            #if $General.rttol_cond.rttol_bool
+              --rttol=$General.rttol_cond.rttol
+            #end if
+
+            --raW=$General.raW
+            --mzW=$General.mzW
+
+            #if $General.updateDb_cond.updateDb
+              --updateDb
+              #if $General.updateDb_cond.copyDb
+                 --copyDb
+              #end if
+            #end if
+
+            #if $General.usePrecursors
+                 --usePrecursors
+            #end if
+
+    ]]></command>
+    <inputs>
+        <section name="Query" title="Query spectra input and filters" expanded="True">
+            <expand macro="sm_input" ql="Query" ql_shrt = "q" user="True" mspuritydatalib="False" msp="False"
+                help="Query SQLite database - in the standard XCMS msPurity workflow - the output
+                    of msPurity.createDatabase should be used here. However any SQLite database
+                    following the schema of as https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-database-vignette.html can be used as input"/>
+            <expand macro="filters" ql="Query" ql_shrt="q"/>
+        </section>
+        <section name="Library" title="Library spectra input and filters" expanded="True">
+            <expand macro="sm_input" ql="Library" ql_shrt = "l" user="False" mspuritydatalib="True" msp="False"
+                help="Library SQLite database - in the standard XCMS msPurity workflow - a default
+                    database of MassBank, HMDB, LipidBlast and GNPS is used. However any SQLite
+                    database following the schema of https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-database-vignette.html can be used as input"/>
+            <expand macro="filters" ql="Library"  ql_shrt="l"/>
+        </section>
+        <section name="General" title="General arguments" expanded="False">
+            <conditional name="rttol_cond">
+                <param name="rttol_bool" type="boolean" label="Filter on retention time match?"
+                    help="" />
+                    <when value="true">
+                        <param name="rttol" type="float" value="30" min="0"
+                            label="Retention time tolerance (seconds)"
+                            help="Retention time tolerance in seconds to match precursors"/>
+                    </when>
+                    <when value="false"/>
+            </conditional>
+            <param name="usePrecursors" type="boolean" checked="true" label="Filter on matching precursors?"
+                help="If True, spectra will be filtered by similarity of precursors based on
+                    the library and query ppm defined tolerance" />
+            <param name="raW" label="Weighting for relative abundance" type="float" value="0.5"
+                help="Relative abundance weight for spectra (default to 0.5 as determined by
+                    massbank for ESI data)"/>
+            <param name="mzW" label="Weighting for mz" type="float" min="0" value="2"
+                help="mz weight for spectra (default to 2 as determined by massbank for ESI data)"/>
+            <conditional name="updateDb_cond">
+                <param name="updateDb" type="boolean" checked="true"
+                    label="Update database with results?" help="" />
+                    <when value="true">
+                        <param name="copyDb" type="boolean" checked="true"
+                            label="Make a copy of the database?"
+                            help="A copy will be made of the input SQLite target database and the
+                                results will be added to this copy.  When False, the input SQLite
+                                database will be updated  with the matching results. Use False if
+                                you want to reduce storage space being used."/>
+                    </when>
+                    <when value="false"/>
+            </conditional>
+        </section>
+    </inputs>
+
+    <outputs>
+        <data name="sqlite_results" format="sqlite" label="${tool.name} on ${on_string}: SQLite results"
+            from_work_dir="db_with_spectral_matching.sqlite" >
+            <filter>create_new_database is True</filter>
+        </data>
+        <data name="matches" format="tsv" label="${tool.name} on ${on_string}: matches"
+            from_work_dir="matched_results.tsv" >
+            <filter>spectra_type_q == "scans"</filter>
+        </data>
+        <data name="xcms_matches" format="tsv" label="${tool.name} on ${on_string}: XCMS matches"
+            from_work_dir="xcms_matched_results.tsv" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="q_dbPth" value="createDatabase_output.sqlite" />
+	        <param name="l_dbPth_select" value="userdb" />
+            <param name="l_dbPth" value="PR100037.sqlite" />
+            <param name="q_xcmsGroups_bool" value="true" />
+            <param name="l_accessions_bool" value="true" />
+            <param name="q_xcmsGroups" value="14" />
+            <param name="l_accessions" value="PR100037" />
+            <output name="xcms_matches" file="spectralMatching_matched_results.tsv" />
+            <output name="matches" file="spectralMatching_xcms_matched_results.tsv" />
+            <output name="sqlite_results" value="spectralMatching_db_with_spectral_matching.sqlite" ftype="sqlite" compare="sim_size"/>
+        </test>
+        <test>
+            <param name="l_instrumentTypes_bool" value="true" />
+            <param name="q_dbPth" value="createDatabase_output.sqlite" />
+	    	<param name="l_dbPth_select" value="userdb" />
+            <param name="l_dbPth" value="PR100037.sqlite" />
+            <param name="q_xcmsGroups_bool" value="true" />
+            <param name="l_accessions_bool" value="true" />
+            <param name="q_xcmsGroups" value="14" />
+            <param name="l_accessions" value="PR100037" />
+            <output name="xcms_matches" file="spectralMatching_matched_results_instrumentTypes.tsv" />
+            <output name="matches" file="spectralMatching_xcms_matched_results_instrumentTypes.tsv" />
+            <output name="sqlite_results" value="spectralMatching_db_with_spectral_matching_instrumentTypes.sqlite" ftype="sqlite" compare="sim_size"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+
+=============================================================
+Spectral matching
+=============================================================
+-----------
+General
+-----------
+
+
+Perform spectral matching to spectral libraries for an LC-MS/MS dataset.
+
+The spectral matching is performed from a **Query** SQLite spectral-database against a **Library** SQLite spectral-database.
+
+The SQLite schema of the spectral database here: spectral_database_schema_
+
+
+The query spectral-database in most cases should contain be the "unknown" spectra database generated the msPurity
+function createDatabase as part of a msPurity-XCMS data processing workflow.
+
+The library spectral-database in most cases should contain the "known" spectra from either public or user generated resources.
+The library SQLite database by default contains data from MoNA including Massbank, HMDB, LipidBlast and GNPS.
+A larger_database_ can be download and used from the msp2db github repository.
+
+To create a user generated library SQLite database the following tool can be used to generate a SQLite database
+from a collection of MSP files: msp2db_.
+
+It should be noted though, that as long as the schema of the spectral-database is as described here, then any database can be used
+for either the library or query -  even allowing for the same database to be used.
+
+The spectral matching functionality has four main components, spectral filtering, spectral alignment, spectral matching,
+and summarising the results.
+
+Spectral filtering is simply filtering both the library and query spectra to be search against (e.g. choosing
+the library source, instrument, retention time, precursor PPM tolerance etc).
+
+The spectral alignment stage involves aligning the query peaks to the library peaks. The approach used is similar
+to modified pMatch algorithm described in Zhou et al 2015.
+
+The spectral matching of the aligned spectra is performed against a combined intensity and m/z weighted vector - created for both
+the query and library spectra (wq and wl). See below:
+
+.. math::
+
+    w=intensity^x \cdot mz^y
+
+
+Where x and y represent weight factors and can be adjusted with the parameters raW and mzW.
+Defaults to x=0.5 and y=2 as per MassBank for ESI based mass spectrometry data.
+
+The aligned weighted vectors are then matched using dot product cosine, reverse dot product cosine and the composite dot product.
+See below for dot product cosine equation.
+
+.. math::
+
+    dpc =  \frac{ w_q \cdot w_l } { \sqrt{Σ{w_{q}{}^2} } \cdot \sqrt{Σ{w_{l}{}^2}}}
+
+
+Full details of the matching approaches are described in the msPurity_spectral_matching_vignette_
+
+--------------------------------------------
+Example LC-MS/MS processing workflow
+--------------------------------------------
+
+* Purity assessments
+   +  (mzML files) -> purityA -> (pa)
+* XCMS processing
+   +  (mzML files) -> xcms.xcmsSet -> xcms.merge -> xcms.group -> xcms.retcor -> xcms.group -> (xset)
+* Fragmentation processing
+   + (xset, pa) -> frag4feature -> filterFragSpectra -> averageAllFragSpectra -> createDatabase -> **spectralMatching** -> (sqlite spectral database)
+
+-----------
+Output
+-----------
+
+**Database**
+
+The updated query database (this will have been updated with the annotation results if updateDb argument used)
+
+
+**xcmsMatchedResults**
+
+If the qeury spectra had XCMS based chromotographic peaks tables (e.g c_peak_groups, c_peaks) in the sqlite database - it will
+be possible to summarise the matches for each XCMS grouped feature. The dataframe contains the following columns
+
+* lpid - id in database of library spectra
+* qpid - id in database of query spectra
+* dpc - dot product cosine of the match
+* rdpc - reverse dot product cosine of the match
+* cdpc - composite dot product cosine of the match
+* mcount - number of matching peaks
+* allcount - total number of peaks across both query and library spectra
+* mpercent - percentage of matching peaks across both query and library spectra
+* library_rt - retention time of library spectra
+* query_rt - retention time of query spectra
+* rtdiff - difference between library and query retention time
+* library_precursor_mz - library precursor mz
+* query_precursor_mz - query precursor mz
+* library_precursor_ion_purity - library precursor ion purity
+* query_precursor_ion_purity - query precursor ion purity
+* library_accession -  library accession value (unique string or number given to eith MoNA or Massbank data entires)
+* library_precursor_type - library precursor type (i.e. adduct)
+* library_entry_name - Name given to the library spectra
+* inchikey - inchikey of the matched library spectra
+* library_source_name - source of the spectra (e.g. massbank, gnps)
+* library_compound_name - name of compound spectra was obtained from
+
+**matchedResults**
+
+All matched results from the query spectra to the library spectra. Contains the same as above
+without the XCMS details. This table is useful to observe spectral matching results
+for all MS/MS spectra irrespective of if they are linked to XCMS MS1 features.
+
+
+.. _spectral_database_schema: https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-datatabase-schema.html
+.. _larger_database: https://github.com/computational-metabolomics/msp2db/releases
+.. _msp2db: https://github.com/computational-metabolomics/msp2db/releases
+.. _msPurity_spectral_matching_vignette: https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-lcmsms-data-processing-and-spectral-matching-vignette.html
+
+    ]]></help>
+
+	<expand macro="citations">     </expand>
+</tool>