Mercurial > repos > recetox > matchms_metadata_merge
changeset 7:ec25de0f0c58 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b797ce0fea5cb3d9a0cf0ec31a21f9b73f112b4e
author | recetox |
---|---|
date | Mon, 15 Jan 2024 12:27:30 +0000 |
parents | c1c1cb295b74 |
children | 99ad7552fb10 |
files | matchms_filtering_wrapper.py test-data/filtering/require_filter.msp test-data/filtering/require_inchi_out.msp test-data/filtering/require_out.msp test-data/filtering/require_smiles_out.msp |
diffstat | 5 files changed, 35 insertions(+), 106 deletions(-) [+] |
line wrap: on
line diff
--- a/matchms_filtering_wrapper.py Thu Dec 14 13:44:21 2023 +0000 +++ b/matchms_filtering_wrapper.py Mon Jan 15 12:27:30 2024 +0000 @@ -7,11 +7,13 @@ from matchms.filtering import default_filters, normalize_intensities, reduce_to_number_of_peaks, select_by_mz, \ select_by_relative_intensity from matchms.filtering.filter_utils.derive_precursor_mz_and_parent_mass import derive_precursor_mz_from_parent_mass +from matchms.filtering.filter_utils.smile_inchi_inchikey_conversions import is_valid_inchi, is_valid_smiles from matchms.importing import load_from_mgf, load_from_msp -def require_key(spectrum, key): - if spectrum.get(key): +def require_key(spectrum, key, function): + value = spectrum.get(key) + if function(value): return spectrum return None @@ -95,10 +97,10 @@ spectrum.set("precursor_mz", precursor_mz) if args.require_smiles and spectrum is not None: - spectrum = require_key(spectrum, "smiles") + spectrum = require_key(spectrum, "smiles", is_valid_smiles) if args.require_inchi and spectrum is not None: - spectrum = require_key(spectrum, "inchi") + spectrum = require_key(spectrum, "inchi", is_valid_inchi) if spectrum is not None: filtered_spectra.append(spectrum)
--- a/test-data/filtering/require_filter.msp Thu Dec 14 13:44:21 2023 +0000 +++ b/test-data/filtering/require_filter.msp Mon Jan 15 12:27:30 2024 +0000 @@ -6,7 +6,7 @@ PARENT_MASS: 347.930801 PUBCHEMID: 10970124 NOMINAL_MASS: 348 -SMILES: Cl[Si]1Oc2ccccc2O1.[C-]#[O+].[C-]#[O+].[CH]1C=CC=C1.[Fe] +SMILES: nan NUM PEAKS: 3 292.0 999.0 314.0 118.89 @@ -21,7 +21,7 @@ RETENTION_INDEX: 1588.0 PUBCHEMID: 130762197 NOMINAL_MASS: 186 -INCHI: InChI=1S/C13H14O/c1-13(10-14)9-12(13)8-7-11-5-3-2-4-6-11/h2-6,12,14H,9-10H2,1H3/t12-,13-/m0/s1 +INCHI: nan NUM PEAKS: 20 51.0 89.92 63.0 89.92
--- a/test-data/filtering/require_inchi_out.msp Thu Dec 14 13:44:21 2023 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ -FORMULA: C13H14O -CASNO: 2000130-22-2 -ID: 7198 -COMMENT: SpectrumID: 1752764; Source: A1-13-956/SMS7-13; DOI: 10.1021/ol1029996; QI: 383; Class: Benzene and substituted derivatives; CASRN not real! |RI:1588| -COMPOUND_NAME: ((1R*,2R*)-1-Methyl-2-phenylethynylcyclopropyl)methanol -PARENT_MASS: 186.1044655 -RETENTION_INDEX: 1588.0 -PUBCHEMID: 130762197 -NOMINAL_MASS: 186 -INCHI: InChI=1S/C13H14O/c1-13(10-14)9-12(13)8-7-11-5-3-2-4-6-11/h2-6,12,14H,9-10H2,1H3/t12-,13-/m0/s1 -NUM PEAKS: 20 -51.0 89.92 -63.0 89.92 -77.0 179.84 -88.0 39.96 -89.0 59.95 -91.0 49.95 -102.0 149.86 -113.0 49.95 -115.0 229.79 -127.0 139.87 -128.0 999.0 -129.0 199.82 -144.0 99.91 -155.0 119.89 -156.0 14.89 -157.0 1.1 -158.0 0.1 -186.0 39.96 -187.0 5.89 -188.0 0.5 - -FORMULA: C34H54O4 -CASNO: 2000774-54-3 -ID: 36905 -COMMENT: SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353| -COMPOUND_NAME: ((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate -PARENT_MASS: 526.40221 -RETENTION_INDEX: 3353.0 -PUBCHEMID: 236415 -NOMINAL_MASS: 526 -INCHI: InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1 -SMILES: C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12 -NUM PEAKS: 14 -189.0 419.62 -203.0 249.77 -216.0 149.86 -262.0 79.93 -276.0 49.95 -393.0 149.86 -423.0 219.8 -453.0 179.84 -466.0 999.0 -526.0 179.84 -527.0 68.94 -528.0 14.29 -529.0 2.1 -530.0 0.2 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filtering/require_out.msp Mon Jan 15 12:27:30 2024 +0000 @@ -0,0 +1,27 @@ +FORMULA: C34H54O4 +CASNO: 2000774-54-3 +ID: 36905 +COMMENT: SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353| +COMPOUND_NAME: ((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate +PARENT_MASS: 526.40221 +RETENTION_INDEX: 3353.0 +PUBCHEMID: 236415 +NOMINAL_MASS: 526 +INCHI: InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1 +SMILES: C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12 +NUM PEAKS: 14 +189.0 419.62 +203.0 249.77 +216.0 149.86 +262.0 79.93 +276.0 49.95 +393.0 149.86 +423.0 219.8 +453.0 179.84 +466.0 999.0 +526.0 179.84 +527.0 68.94 +528.0 14.29 +529.0 2.1 +530.0 0.2 +
--- a/test-data/filtering/require_smiles_out.msp Thu Dec 14 13:44:21 2023 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -FORMULA: C13H9ClFeO4Si -CASNO: 2000570-99-8 -ID: 2011 -COMMENT: SpectrumID: 1519953; Source: C4-1998-38-3; Class: Benzenoids; CASRN not real! -COMPOUND_NAME: ((.eta.5-Cyclopentadienylironbiscarbonyl)(1,2-phenylenedioxysilyl)chloride complex -PARENT_MASS: 347.930801 -PUBCHEMID: 10970124 -NOMINAL_MASS: 348 -SMILES: Cl[Si]1Oc2ccccc2O1.[C-]#[O+].[C-]#[O+].[CH]1C=CC=C1.[Fe] -NUM PEAKS: 3 -292.0 999.0 -314.0 118.89 -348.0 734.24 - -FORMULA: C34H54O4 -CASNO: 2000774-54-3 -ID: 36905 -COMMENT: SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353| -COMPOUND_NAME: ((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate -PARENT_MASS: 526.40221 -RETENTION_INDEX: 3353.0 -PUBCHEMID: 236415 -NOMINAL_MASS: 526 -INCHI: InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1 -SMILES: C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12 -NUM PEAKS: 14 -189.0 419.62 -203.0 249.77 -216.0 149.86 -262.0 79.93 -276.0 49.95 -393.0 149.86 -423.0 219.8 -453.0 179.84 -466.0 999.0 -526.0 179.84 -527.0 68.94 -528.0 14.29 -529.0 2.1 -530.0 0.2 -