changeset 2:0a64e2bcce96 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b797ce0fea5cb3d9a0cf0ec31a21f9b73f112b4e
author recetox
date Mon, 15 Jan 2024 12:26:55 +0000
parents 6b5867cab698
children 358a151ab81e
files matchms_filtering_wrapper.py test-data/filtering/require_filter.msp test-data/filtering/require_inchi_out.msp test-data/filtering/require_out.msp test-data/filtering/require_smiles_out.msp
diffstat 5 files changed, 35 insertions(+), 106 deletions(-) [+]
line wrap: on
line diff
--- a/matchms_filtering_wrapper.py	Thu Dec 14 13:43:50 2023 +0000
+++ b/matchms_filtering_wrapper.py	Mon Jan 15 12:26:55 2024 +0000
@@ -7,11 +7,13 @@
 from matchms.filtering import default_filters, normalize_intensities, reduce_to_number_of_peaks, select_by_mz, \
     select_by_relative_intensity
 from matchms.filtering.filter_utils.derive_precursor_mz_and_parent_mass import derive_precursor_mz_from_parent_mass
+from matchms.filtering.filter_utils.smile_inchi_inchikey_conversions import is_valid_inchi, is_valid_smiles
 from matchms.importing import load_from_mgf, load_from_msp
 
 
-def require_key(spectrum, key):
-    if spectrum.get(key):
+def require_key(spectrum, key, function):
+    value = spectrum.get(key)
+    if function(value):
         return spectrum
 
     return None
@@ -95,10 +97,10 @@
             spectrum.set("precursor_mz", precursor_mz)
 
         if args.require_smiles and spectrum is not None:
-            spectrum = require_key(spectrum, "smiles")
+            spectrum = require_key(spectrum, "smiles", is_valid_smiles)
 
         if args.require_inchi and spectrum is not None:
-            spectrum = require_key(spectrum, "inchi")
+            spectrum = require_key(spectrum, "inchi", is_valid_inchi)
 
         if spectrum is not None:
             filtered_spectra.append(spectrum)
--- a/test-data/filtering/require_filter.msp	Thu Dec 14 13:43:50 2023 +0000
+++ b/test-data/filtering/require_filter.msp	Mon Jan 15 12:26:55 2024 +0000
@@ -6,7 +6,7 @@
 PARENT_MASS: 347.930801
 PUBCHEMID: 10970124
 NOMINAL_MASS: 348
-SMILES: Cl[Si]1Oc2ccccc2O1.[C-]#[O+].[C-]#[O+].[CH]1C=CC=C1.[Fe]
+SMILES: nan
 NUM PEAKS: 3
 292.0       999.0
 314.0       118.89
@@ -21,7 +21,7 @@
 RETENTION_INDEX: 1588.0
 PUBCHEMID: 130762197
 NOMINAL_MASS: 186
-INCHI: InChI=1S/C13H14O/c1-13(10-14)9-12(13)8-7-11-5-3-2-4-6-11/h2-6,12,14H,9-10H2,1H3/t12-,13-/m0/s1
+INCHI: nan
 NUM PEAKS: 20
 51.0        89.92
 63.0        89.92
--- a/test-data/filtering/require_inchi_out.msp	Thu Dec 14 13:43:50 2023 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-FORMULA: C13H14O
-CASNO: 2000130-22-2
-ID: 7198
-COMMENT: SpectrumID: 1752764; Source: A1-13-956/SMS7-13; DOI: 10.1021/ol1029996; QI: 383; Class: Benzene and substituted derivatives; CASRN not real! |RI:1588|
-COMPOUND_NAME: ((1R*,2R*)-1-Methyl-2-phenylethynylcyclopropyl)methanol
-PARENT_MASS: 186.1044655
-RETENTION_INDEX: 1588.0
-PUBCHEMID: 130762197
-NOMINAL_MASS: 186
-INCHI: InChI=1S/C13H14O/c1-13(10-14)9-12(13)8-7-11-5-3-2-4-6-11/h2-6,12,14H,9-10H2,1H3/t12-,13-/m0/s1
-NUM PEAKS: 20
-51.0        89.92
-63.0        89.92
-77.0        179.84
-88.0        39.96
-89.0        59.95
-91.0        49.95
-102.0       149.86
-113.0       49.95
-115.0       229.79
-127.0       139.87
-128.0       999.0
-129.0       199.82
-144.0       99.91
-155.0       119.89
-156.0       14.89
-157.0       1.1
-158.0       0.1
-186.0       39.96
-187.0       5.89
-188.0       0.5
-
-FORMULA: C34H54O4
-CASNO: 2000774-54-3
-ID: 36905
-COMMENT: SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353|
-COMPOUND_NAME: ((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate
-PARENT_MASS: 526.40221
-RETENTION_INDEX: 3353.0
-PUBCHEMID: 236415
-NOMINAL_MASS: 526
-INCHI: InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1
-SMILES: C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12
-NUM PEAKS: 14
-189.0       419.62
-203.0       249.77
-216.0       149.86
-262.0       79.93
-276.0       49.95
-393.0       149.86
-423.0       219.8
-453.0       179.84
-466.0       999.0
-526.0       179.84
-527.0       68.94
-528.0       14.29
-529.0       2.1
-530.0       0.2
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtering/require_out.msp	Mon Jan 15 12:26:55 2024 +0000
@@ -0,0 +1,27 @@
+FORMULA: C34H54O4
+CASNO: 2000774-54-3
+ID: 36905
+COMMENT: SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353|
+COMPOUND_NAME: ((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate
+PARENT_MASS: 526.40221
+RETENTION_INDEX: 3353.0
+PUBCHEMID: 236415
+NOMINAL_MASS: 526
+INCHI: InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1
+SMILES: C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12
+NUM PEAKS: 14
+189.0       419.62
+203.0       249.77
+216.0       149.86
+262.0       79.93
+276.0       49.95
+393.0       149.86
+423.0       219.8
+453.0       179.84
+466.0       999.0
+526.0       179.84
+527.0       68.94
+528.0       14.29
+529.0       2.1
+530.0       0.2
+
--- a/test-data/filtering/require_smiles_out.msp	Thu Dec 14 13:43:50 2023 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-FORMULA: C13H9ClFeO4Si
-CASNO: 2000570-99-8
-ID: 2011
-COMMENT: SpectrumID: 1519953; Source: C4-1998-38-3; Class: Benzenoids; CASRN not real!
-COMPOUND_NAME: ((.eta.5-Cyclopentadienylironbiscarbonyl)(1,2-phenylenedioxysilyl)chloride complex
-PARENT_MASS: 347.930801
-PUBCHEMID: 10970124
-NOMINAL_MASS: 348
-SMILES: Cl[Si]1Oc2ccccc2O1.[C-]#[O+].[C-]#[O+].[CH]1C=CC=C1.[Fe]
-NUM PEAKS: 3
-292.0       999.0
-314.0       118.89
-348.0       734.24
-
-FORMULA: C34H54O4
-CASNO: 2000774-54-3
-ID: 36905
-COMMENT: SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353|
-COMPOUND_NAME: ((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate
-PARENT_MASS: 526.40221
-RETENTION_INDEX: 3353.0
-PUBCHEMID: 236415
-NOMINAL_MASS: 526
-INCHI: InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1
-SMILES: C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12
-NUM PEAKS: 14
-189.0       419.62
-203.0       249.77
-216.0       149.86
-262.0       79.93
-276.0       49.95
-393.0       149.86
-423.0       219.8
-453.0       179.84
-466.0       999.0
-526.0       179.84
-527.0       68.94
-528.0       14.29
-529.0       2.1
-530.0       0.2
-