changeset 1:6b5867cab698 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 98223db312c30b0e121a1422a9534a3db3fbf0c0
author recetox
date Thu, 14 Dec 2023 13:43:50 +0000
parents ea891750acfc
children 0a64e2bcce96
files matchms_filtering_wrapper.py test-data/filtering/derive_precursor_mz.msp test-data/filtering/derive_precursor_mz_out.msp
diffstat 3 files changed, 108 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/matchms_filtering_wrapper.py	Mon Dec 04 19:17:25 2023 +0000
+++ b/matchms_filtering_wrapper.py	Thu Dec 14 13:43:50 2023 +0000
@@ -6,6 +6,7 @@
     add_retention_index, add_retention_time, clean_compound_name
 from matchms.filtering import default_filters, normalize_intensities, reduce_to_number_of_peaks, select_by_mz, \
     select_by_relative_intensity
+from matchms.filtering.filter_utils.derive_precursor_mz_and_parent_mass import derive_precursor_mz_from_parent_mass
 from matchms.importing import load_from_mgf, load_from_msp
 
 
@@ -39,6 +40,9 @@
                         help="Remove spectra that does not contain SMILES.")
     parser.add_argument("-require_inchi", action='store_true',
                         help="Remove spectra that does not contain INCHI.")
+    parser.add_argument("-derive_precursor_mz_from_parent_mass", action='store_true',
+                        help="Derives the precursor_mz from the parent mass and adduct or charge.")
+    parser.add_argument("--estimate_from_adduct", type=str, help="estimate from adduct.")
     parser.add_argument("-reduce_to_top_n_peaks", action='store_true',
                         help="reduce to top n peaks filter.")
     parser.add_argument("--n_max", type=int, help="Maximum number of peaks. Remove peaks if more peaks are found.")
@@ -51,6 +55,7 @@
             or args.mz_range
             or args.require_smiles
             or args.require_inchi
+            or args.derive_precursor_mz_from_parent_mass
             or args.reduce_to_top_n_peaks):
         raise ValueError('No filter selected.')
 
@@ -84,6 +89,11 @@
         if args.reduce_to_top_n_peaks:
             spectrum = reduce_to_number_of_peaks(spectrum_in=spectrum, n_max=args.n_max)
 
+        if args.derive_precursor_mz_from_parent_mass:
+            spectrum.set("parent_mass", float(spectrum.get('parent_mass')))
+            precursor_mz = derive_precursor_mz_from_parent_mass(spectrum, args.estimate_from_adduct)
+            spectrum.set("precursor_mz", precursor_mz)
+
         if args.require_smiles and spectrum is not None:
             spectrum = require_key(spectrum, "smiles")
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtering/derive_precursor_mz.msp	Thu Dec 14 13:43:50 2023 +0000
@@ -0,0 +1,48 @@
+SCANNUMBER: -1
+IONMODE: positive
+SPECTRUMTYPE: Centroid
+FORMULA: C20H12
+INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N
+SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2
+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
+IONIZATION: EI+
+LICENSE: CC BY-NC
+COMPOUND_NAME: Perylene
+RETENTION_TIME: None
+RETENTION_INDEX: 2886.9
+ADDUCT: [M]+
+COLLISION_ENERGY: 70eV
+INSTRUMENT_TYPE: GC-EI-Orbitrap
+CHARGE: 1
+PARENT_MASS: 251.08595400000002
+NUM PEAKS: 3
+250.07765   0.3282529462971431
+252.09323   1.0
+253.09656   0.20573802940517583
+
+SCANNUMBER: -1
+IONMODE: positive
+SPECTRUMTYPE: Centroid
+FORMULA: C14H10
+INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N
+SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2
+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
+IONIZATION: EI+
+LICENSE: CC BY-NC
+COMPOUND_NAME: Phenanthrene
+RETENTION_TIME: None
+RETENTION_INDEX: 1832.9
+ADDUCT: [M]+
+COLLISION_ENERGY: 70eV
+INSTRUMENT_TYPE: GC-EI-Orbitrap
+CHARGE: 1
+PARENT_MASS: 177.070224
+NUM PEAKS: 5
+152.0619    0.1657993569424221
+176.062     0.24558560966311757
+177.06982   0.12764433529926775
+178.0775    1.0
+179.08078   0.16394988149600653
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtering/derive_precursor_mz_out.msp	Thu Dec 14 13:43:50 2023 +0000
@@ -0,0 +1,50 @@
+SCANNUMBER: -1
+IONMODE: positive
+SPECTRUMTYPE: Centroid
+FORMULA: C20H12
+INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N
+SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2
+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
+IONIZATION: EI+
+LICENSE: CC BY-NC
+COMPOUND_NAME: Perylene
+RETENTION_TIME: None
+RETENTION_INDEX: 2886.9
+ADDUCT: [M]+
+COLLISION_ENERGY: 70eV
+INSTRUMENT_TYPE: GC-EI-Orbitrap
+CHARGE: 1
+PARENT_MASS: 251.08595400000002
+PRECURSOR_MZ: 251.08540542009078
+NUM PEAKS: 3
+250.07765   0.3282529462971431
+252.09323   1.0
+253.09656   0.20573802940517583
+
+SCANNUMBER: -1
+IONMODE: positive
+SPECTRUMTYPE: Centroid
+FORMULA: C14H10
+INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N
+SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2
+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
+IONIZATION: EI+
+LICENSE: CC BY-NC
+COMPOUND_NAME: Phenanthrene
+RETENTION_TIME: None
+RETENTION_INDEX: 1832.9
+ADDUCT: [M]+
+COLLISION_ENERGY: 70eV
+INSTRUMENT_TYPE: GC-EI-Orbitrap
+CHARGE: 1
+PARENT_MASS: 177.070224
+PRECURSOR_MZ: 177.06967542009076
+NUM PEAKS: 5
+152.0619    0.1657993569424221
+176.062     0.24558560966311757
+177.06982   0.12764433529926775
+178.0775    1.0
+179.08078   0.16394988149600653
+