Mercurial > repos > recetox > matchms_metadata_merge
changeset 6:c1c1cb295b74 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 98223db312c30b0e121a1422a9534a3db3fbf0c0
author | recetox |
---|---|
date | Thu, 14 Dec 2023 13:44:21 +0000 |
parents | 3a37748f6c95 |
children | ec25de0f0c58 |
files | matchms_filtering_wrapper.py test-data/filtering/derive_precursor_mz.msp test-data/filtering/derive_precursor_mz_out.msp |
diffstat | 3 files changed, 108 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/matchms_filtering_wrapper.py Mon Dec 04 19:16:50 2023 +0000 +++ b/matchms_filtering_wrapper.py Thu Dec 14 13:44:21 2023 +0000 @@ -6,6 +6,7 @@ add_retention_index, add_retention_time, clean_compound_name from matchms.filtering import default_filters, normalize_intensities, reduce_to_number_of_peaks, select_by_mz, \ select_by_relative_intensity +from matchms.filtering.filter_utils.derive_precursor_mz_and_parent_mass import derive_precursor_mz_from_parent_mass from matchms.importing import load_from_mgf, load_from_msp @@ -39,6 +40,9 @@ help="Remove spectra that does not contain SMILES.") parser.add_argument("-require_inchi", action='store_true', help="Remove spectra that does not contain INCHI.") + parser.add_argument("-derive_precursor_mz_from_parent_mass", action='store_true', + help="Derives the precursor_mz from the parent mass and adduct or charge.") + parser.add_argument("--estimate_from_adduct", type=str, help="estimate from adduct.") parser.add_argument("-reduce_to_top_n_peaks", action='store_true', help="reduce to top n peaks filter.") parser.add_argument("--n_max", type=int, help="Maximum number of peaks. Remove peaks if more peaks are found.") @@ -51,6 +55,7 @@ or args.mz_range or args.require_smiles or args.require_inchi + or args.derive_precursor_mz_from_parent_mass or args.reduce_to_top_n_peaks): raise ValueError('No filter selected.') @@ -84,6 +89,11 @@ if args.reduce_to_top_n_peaks: spectrum = reduce_to_number_of_peaks(spectrum_in=spectrum, n_max=args.n_max) + if args.derive_precursor_mz_from_parent_mass: + spectrum.set("parent_mass", float(spectrum.get('parent_mass'))) + precursor_mz = derive_precursor_mz_from_parent_mass(spectrum, args.estimate_from_adduct) + spectrum.set("precursor_mz", precursor_mz) + if args.require_smiles and spectrum is not None: spectrum = require_key(spectrum, "smiles")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filtering/derive_precursor_mz.msp Thu Dec 14 13:44:21 2023 +0000 @@ -0,0 +1,48 @@ +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C20H12 +INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N +SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Perylene +RETENTION_TIME: None +RETENTION_INDEX: 2886.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 251.08595400000002 +NUM PEAKS: 3 +250.07765 0.3282529462971431 +252.09323 1.0 +253.09656 0.20573802940517583 + +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C14H10 +INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N +SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Phenanthrene +RETENTION_TIME: None +RETENTION_INDEX: 1832.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 177.070224 +NUM PEAKS: 5 +152.0619 0.1657993569424221 +176.062 0.24558560966311757 +177.06982 0.12764433529926775 +178.0775 1.0 +179.08078 0.16394988149600653 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filtering/derive_precursor_mz_out.msp Thu Dec 14 13:44:21 2023 +0000 @@ -0,0 +1,50 @@ +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C20H12 +INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N +SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Perylene +RETENTION_TIME: None +RETENTION_INDEX: 2886.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 251.08595400000002 +PRECURSOR_MZ: 251.08540542009078 +NUM PEAKS: 3 +250.07765 0.3282529462971431 +252.09323 1.0 +253.09656 0.20573802940517583 + +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C14H10 +INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N +SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Phenanthrene +RETENTION_TIME: None +RETENTION_INDEX: 1832.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 177.070224 +PRECURSOR_MZ: 177.06967542009076 +NUM PEAKS: 5 +152.0619 0.1657993569424221 +176.062 0.24558560966311757 +177.06982 0.12764433529926775 +178.0775 1.0 +179.08078 0.16394988149600653 +