Previous changeset 19:c5076cc15e8c (2024-04-22) Next changeset 21:07cac38cbd29 (2024-06-04) |
Commit message:
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7 |
modified:
help.xml macros.xml matchms_filtering.xml matchms_filtering_wrapper.py test-data/metadata_match/formula.json test-data/metadata_match/inchi_key_inner.json test-data/metadata_match/smiles.json |
added:
test-data/remove_spectra/require_compound_name.msp test-data/remove_spectra/require_filter.msp test-data/remove_spectra/require_formula.msp test-data/remove_spectra/require_inchi.msp test-data/remove_spectra/require_inchi_removed_spectra.msp test-data/remove_spectra/require_inchikey.msp test-data/remove_spectra/require_precursor_mz.msp test-data/remove_spectra/require_retention_index.msp test-data/remove_spectra/require_retention_time.msp test-data/remove_spectra/require_smiles.msp |
removed:
test-data/filtering/require_filter.msp test-data/filtering/require_out.msp |
b |
diff -r c5076cc15e8c -r 86d265d2a334 help.xml --- a/help.xml Mon Apr 22 08:39:32 2024 +0000 +++ b/help.xml Thu May 30 18:08:27 2024 +0000 |
b |
@@ -6,11 +6,6 @@ For detailed documentation on the tool, see https://github.com/matchms/matchms/blob/master/README.rst and https://matchms.readthedocs.io/en/latest/ for the Python API. - To get more familiar with the library, there is a `tutorial`_ available which explains how to build a mass spectrometry data processing - pipeline with matchms. - - .. _tutorial: https://medium.com/escience-center/build-your-own-mass-spectrometry-analysis-pipeline-in-python-using-matchms-part-i-d96c718c68ee - Overview .. image:: https://github.com/RECETOX/galaxytools/raw/29e54e69dab6ab1263f56d35ea19f5d7f284d728/tools/matchms/images/matchms_galaxytools.png :width: 3120 @@ -55,4 +50,4 @@ +----------+-----------+---------+--------+ ]]> </token> -</macros> \ No newline at end of file +</macros> |
b |
diff -r c5076cc15e8c -r 86d265d2a334 macros.xml --- a/macros.xml Mon Apr 22 08:39:32 2024 +0000 +++ b/macros.xml Thu May 30 18:08:27 2024 +0000 |
b |
@@ -1,5 +1,5 @@ <macros> - <token name="@TOOL_VERSION@">0.24.0</token> + <token name="@TOOL_VERSION@">0.25.0</token> <xml name="creator"> <creator> |
b |
diff -r c5076cc15e8c -r 86d265d2a334 matchms_filtering.xml --- a/matchms_filtering.xml Mon Apr 22 08:39:32 2024 +0000 +++ b/matchms_filtering.xml Thu May 30 18:08:27 2024 +0000 |
b |
@@ -1,4 +1,4 @@ -<tool id="matchms_filtering" name="matchms filtering" version="@TOOL_VERSION@+galaxy2" profile="21.09"> +<tool id="matchms_filtering" name="matchms filtering" version="@TOOL_VERSION@+galaxy1" profile="21.09"> <description>filter and normalize mass spectrometry data</description> <macros> @@ -49,12 +49,6 @@ --from_mz "$mz_range.from_mz" \ --to_mz "$mz_range.to_mz" \ #end if - #if $require_smiles_is_true == "TRUE" - -require_smiles \ - #end if - #if $require_inchi_is_true == "TRUE" - -require_inchi \ - #end if #if $derive_precursor_mz_from_parent_mass.is_true == "TRUE" -derive_precursor_mz_from_parent_mass \ --estimate_from_adduct "${derive_precursor_mz_from_parent_mass.estimate_from_adduct}" \ @@ -102,12 +96,6 @@ <when value="FALSE"></when> </conditional> - <param name="require_smiles_is_true" label="Require SMILES" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" - help="Remove spectra that does not contain SMILES." /> - - <param name="require_inchi_is_true" label="Require INCHI" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" - help="Remove spectra that does not contain INCHI." /> - <conditional name="derive_precursor_mz_from_parent_mass"> <param name="is_true" label="Derive precursor_mz from parent_mass" type="select" help="Derives the precursor_mz from the parent mass and adduct or charge."> @@ -173,16 +161,6 @@ <output name="output" file="filtering/mz_range.msp" ftype="msp"/> </test> <test> - <param name="spectra" value="filtering/require_filter.msp" ftype="msp"/> - <param name="require_smiles_is_true" value="TRUE"/> - <output name="output" file="filtering/require_out.msp" ftype="msp"/> - </test> - <test> - <param name="spectra" value="filtering/require_filter.msp" ftype="msp"/> - <param name="require_inchi_is_true" value="TRUE"/> - <output name="output" file="filtering/require_out.msp" ftype="msp"/> - </test> - <test> <param name="spectra" value="filtering/input.msp" ftype="msp"/> <section name="reduce_to_top_n_peaks"> <param name="is_true" value="TRUE"/> |
b |
diff -r c5076cc15e8c -r 86d265d2a334 matchms_filtering_wrapper.py --- a/matchms_filtering_wrapper.py Mon Apr 22 08:39:32 2024 +0000 +++ b/matchms_filtering_wrapper.py Thu May 30 18:08:27 2024 +0000 |
b |
@@ -2,23 +2,14 @@ import sys from matchms.exporting import save_as_mgf, save_as_msp -from matchms.filtering import add_compound_name, add_fingerprint, add_losses, add_parent_mass, add_precursor_mz,\ +from matchms.filtering import add_compound_name, add_fingerprint, add_losses, add_parent_mass, add_precursor_mz, \ add_retention_index, add_retention_time, clean_compound_name from matchms.filtering import default_filters, normalize_intensities, reduce_to_number_of_peaks, select_by_mz, \ select_by_relative_intensity from matchms.filtering.filter_utils.derive_precursor_mz_and_parent_mass import derive_precursor_mz_from_parent_mass -from matchms.filtering.filter_utils.smile_inchi_inchikey_conversions import is_valid_inchi, is_valid_smiles from matchms.importing import load_from_mgf, load_from_msp -def require_key(spectrum, key, function): - value = spectrum.get(key) - if function(value): - return spectrum - - return None - - def main(argv): parser = argparse.ArgumentParser(description="Compute MSP similarity scores") parser.add_argument("--spectra", type=str, required=True, help="Mass spectra file to be filtered.") @@ -38,10 +29,6 @@ help="Keep only peaks between set m/z range (keep if to_mz >= m/z >= from_mz).") parser.add_argument("--from_mz", type=float, help="Lower bound for m/z filter") parser.add_argument("--to_mz", type=float, help="Upper bound for m/z filter") - parser.add_argument("-require_smiles", action='store_true', - help="Remove spectra that does not contain SMILES.") - parser.add_argument("-require_inchi", action='store_true', - help="Remove spectra that does not contain INCHI.") parser.add_argument("-derive_precursor_mz_from_parent_mass", action='store_true', help="Derives the precursor_mz from the parent mass and adduct or charge.") parser.add_argument("--estimate_from_adduct", type=str, help="estimate from adduct.") @@ -55,8 +42,6 @@ or args.clean_metadata or args.relative_intensity or args.mz_range - or args.require_smiles - or args.require_inchi or args.derive_precursor_mz_from_parent_mass or args.reduce_to_top_n_peaks): raise ValueError('No filter selected.') @@ -96,12 +81,6 @@ precursor_mz = derive_precursor_mz_from_parent_mass(spectrum, args.estimate_from_adduct) spectrum.set("precursor_mz", precursor_mz) - if args.require_smiles and spectrum is not None: - spectrum = require_key(spectrum, "smiles", is_valid_smiles) - - if args.require_inchi and spectrum is not None: - spectrum = require_key(spectrum, "inchi", is_valid_inchi) - if spectrum is not None: filtered_spectra.append(spectrum) |
b |
diff -r c5076cc15e8c -r 86d265d2a334 test-data/filtering/require_filter.msp --- a/test-data/filtering/require_filter.msp Mon Apr 22 08:39:32 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,72 +0,0 @@ -FORMULA: C13H9ClFeO4Si -CASNO: 2000570-99-8 -ID: 2011 -COMMENT: SpectrumID: 1519953; Source: C4-1998-38-3; Class: Benzenoids; CASRN not real! -COMPOUND_NAME: ((.eta.5-Cyclopentadienylironbiscarbonyl)(1,2-phenylenedioxysilyl)chloride complex -PARENT_MASS: 347.930801 -PUBCHEMID: 10970124 -NOMINAL_MASS: 348 -SMILES: nan -NUM PEAKS: 3 -292.0 999.0 -314.0 118.89 -348.0 734.24 - -FORMULA: C13H14O -CASNO: 2000130-22-2 -ID: 7198 -COMMENT: SpectrumID: 1752764; Source: A1-13-956/SMS7-13; DOI: 10.1021/ol1029996; QI: 383; Class: Benzene and substituted derivatives; CASRN not real! |RI:1588| -COMPOUND_NAME: ((1R*,2R*)-1-Methyl-2-phenylethynylcyclopropyl)methanol -PARENT_MASS: 186.1044655 -RETENTION_INDEX: 1588.0 -PUBCHEMID: 130762197 -NOMINAL_MASS: 186 -INCHI: nan -NUM PEAKS: 20 -51.0 89.92 -63.0 89.92 -77.0 179.84 -88.0 39.96 -89.0 59.95 -91.0 49.95 -102.0 149.86 -113.0 49.95 -115.0 229.79 -127.0 139.87 -128.0 999.0 -129.0 199.82 -144.0 99.91 -155.0 119.89 -156.0 14.89 -157.0 1.1 -158.0 0.1 -186.0 39.96 -187.0 5.89 -188.0 0.5 - -FORMULA: C34H54O4 -CASNO: 2000774-54-3 -ID: 36905 -COMMENT: SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353| -COMPOUND_NAME: ((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate -PARENT_MASS: 526.40221 -RETENTION_INDEX: 3353.0 -PUBCHEMID: 236415 -NOMINAL_MASS: 526 -INCHI: InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1 -SMILES: C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12 -NUM PEAKS: 14 -189.0 419.62 -203.0 249.77 -216.0 149.86 -262.0 79.93 -276.0 49.95 -393.0 149.86 -423.0 219.8 -453.0 179.84 -466.0 999.0 -526.0 179.84 -527.0 68.94 -528.0 14.29 -529.0 2.1 -530.0 0.2 \ No newline at end of file |
b |
diff -r c5076cc15e8c -r 86d265d2a334 test-data/filtering/require_out.msp --- a/test-data/filtering/require_out.msp Mon Apr 22 08:39:32 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,27 +0,0 @@ -FORMULA: C34H54O4 -CASNO: 2000774-54-3 -ID: 36905 -COMMENT: SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353| -COMPOUND_NAME: ((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate -PARENT_MASS: 526.40221 -RETENTION_INDEX: 3353.0 -PUBCHEMID: 236415 -NOMINAL_MASS: 526 -INCHI: InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1 -SMILES: C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12 -NUM PEAKS: 14 -189.0 419.62 -203.0 249.77 -216.0 149.86 -262.0 79.93 -276.0 49.95 -393.0 149.86 -423.0 219.8 -453.0 179.84 -466.0 999.0 -526.0 179.84 -527.0 68.94 -528.0 14.29 -529.0 2.1 -530.0 0.2 - |
b |
diff -r c5076cc15e8c -r 86d265d2a334 test-data/metadata_match/formula.json --- a/test-data/metadata_match/formula.json Mon Apr 22 08:39:32 2024 +0000 +++ b/test-data/metadata_match/formula.json Thu May 30 18:08:27 2024 +0000 |
[ |
b'@@ -1,1 +1,1 @@\n-{"__Scores__": true, "is_symmetric": false, "references": [{"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "YNPNZTXNASCQKK-UHFFFAOYSA-N", "smiles": "C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": {"74.01508": "Theoretical m/z 74.01565, Mass diff 0 (0 ppm), Formula C6H2", "75.02295": "Theoretical m/z 75.023475, Mass diff 0 (0 ppm), Formula C6H3", "76.03075": "Theoretical m/z 76.0313, Mass diff 0 (0 ppm), Formula C6H4", "87.02295": "Theoretical m/z 87.023475, Mass diff 0 (0 ppm), Formula C7H3", "88.03076": "Theoretical m/z 88.0313, Mass diff 0 (0 ppm), Formula C7H4", "89.03857": "Theoretical m/z 89.039125, Mass diff 0 (0 ppm), Formula C7H5", "151.05415": "Theoretical m/z 151.054775, Mass diff 0 (0 ppm), Formula C12H7", "175.05423": "Theoretical m/z 175.054775, Mass diff 0 (0 ppm), Formula C14H7", "176.062": "Theoretical m/z 176.0626, Mass diff 0 (0 ppm), Formula C14H8", "177.06982": "Theoretical m/z 177.070425, Mass diff 0 (0 ppm), Formula C14H9", "178.0775": "Theoretical m/z 178.077698, Mass diff 0 (1.11 ppm), SMILES C=1C=CC2=C(C1)C=CC=3C=CC=CC32, Annotation [C14H10]+, Rule of HR False"}, "num_peaks": "19", "compound_name": "Phenanthrene", "retention_time": null, "retention_index": 1832.9, "precursor_mz": 178.0775, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[74.01508, 137808.0], [75.02295, 278714.0], [76.03075, 608417.0], [87.02295, 304266.0], [88.03076, 497050.0], [89.03857, 441168.0], [98.01511, 150478.0], [150.04633, 868927.0], [151.05415, 546351.0], [152.0619, 2275502.0], [153.06528, 276320.0], [169.06468, 272559.0], [174.04636, 365846.0], [175.05423, 272039.0], [176.062, 3370523.0], [177.06982, 1751846.0], [178.0775, 13724432.0], [179.08078, 2250119.0], [180.08412, 138203.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "MWPLVEDNUUSJAV-UHFFFAOYSA-N", "smiles": "C1=CC2=CC3=C(C=CC=C3)C=C2C=C1", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": {"74.0151": "Theoretical m/z 74.01565, Mass diff 0 (0 ppm), Formula C6H2", "75.02296": "Theoretical m/z 75.023475, Mass diff 0 (0 ppm), Formula C6H3", "76.03077": "Theoretical m/z 76.0313, Mass diff 0 (0 ppm), Formula C6H4", "87.02297": "Theoretical m/z 87.023475, Mass diff 0 (0 ppm), Formula C7H3", "88.03078": "Theoretical m/z 88.0313, Mass diff 0 (0 ppm), Formula C7H4", "89.0386": "Theoretical m/z 89.039125, Mass diff 0 (0 ppm), Formula C7H5", "91.05425": "Theoretical m/z 91.054775, Mass diff 0 (0 ppm), Formula C7H7", "151.05415": "Theoretical m/z 151.054775, Mass diff 0 (0 ppm), Formula C12H7", "175.05423": "Theoretical m/z 175.054775, Mass diff 0 (0 ppm), Formula C14H7", "176.06204": "Theoretical m/z 176.0626, Mass diff 0 (0 ppm), Formula C14H8", "177.06984": "Theoretical m/z 177.070425, Mass diff 0 (0 ppm), Formula C14H9", "178.07754": "Theoretical m/z 178.077698, Mass diff 0 (0.89 ppm), SMILES C=1C=CC=2C=C3C=CC=CC3=CC2C1, Annotation [C14H10]+, Rule of HR False"}, "num_peaks": "21", "compound_name": "Anthracene", "retention_time": null, "retention_index": 1844.4, "precursor_mz": 178.07754, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[74.0151, 117371.0], [75.02296, 245305.0], [76.03077, 632686.0], [87.02297, 236214.0], [88.03078, 347832.0], [89.0386, 507288.0], [91.05425, 130861.0], [126.04636, 128356.0], [149.04478, 126331.0], [150.04637, 641829.0], [151.05415, 403195.0], [152.06195, 1369833.0], [153.06534, 175290.0], [169.06471, 228827.0], [174.04639, 255716.0], [175.05423, 198784.0], [176.06204, 2563053.0], [177.06984, 1182037.0], [178.07754, 11002398.0], [179.080'..b'9.0], [213.06973, 256631.0], [215.08548, 1220866.0], [216.08881, 217320.0], [224.062, 181393.0], [226.07771, 1503774.0], [227.08588, 1198323.0], [228.09344, 2885510.0], [229.10123, 1791323.0], [230.10886, 17890056.0], [231.11214, 3536976.0], [232.11542, 320231.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C16H10S", "inchikey": "YEUHHUCOSQOCIX-UHFFFAOYSA-N", "smiles": "S1C2=C(C=CC=C2)C2=C1C1=CC=CC=C1C=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": {"91.05425": "Theoretical m/z 91.054775, Mass diff 0 (0 ppm), Formula C7H7", "163.05408": "Theoretical m/z 163.054775, Mass diff 0 (0 ppm), Formula C13H7", "187.05406": "Theoretical m/z 187.054775, Mass diff 0 (0 ppm), Formula C15H7", "189.06969": "Theoretical m/z 189.070425, Mass diff 0 (0 ppm), Formula C15H9", "201.06973": "Theoretical m/z 201.070425, Mass diff 0 (0 ppm), Formula C16H9", "233.04245": "Theoretical m/z 233.042496, Mass diff -0.001 (0 ppm), Formula C16H9S", "234.04965": "Theoretical m/z 234.049775, Mass diff 0 (0.53 ppm), SMILES S1C=2C=CC=CC2C=3C=CC=4C=CC=CC4C13, Annotation [C16H10S]+, Rule of HR False"}, "num_peaks": "23", "compound_name": "Benzo[b]naphtho[2,1-d]thiophene", "retention_time": null, "retention_index": 2419.3, "precursor_mz": 234.04965, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[91.05425, 49130.0], [93.52686, 61653.0], [94.53469, 76638.0], [104.01679, 126871.0], [116.01675, 112547.0], [117.02459, 99108.0], [162.04623, 54491.0], [163.05408, 136418.0], [164.06198, 56321.0], [187.05406, 273349.0], [188.06189, 200750.0], [189.06969, 453336.0], [190.07306, 77884.0], [200.06189, 135620.0], [201.06973, 74692.0], [202.07758, 425058.0], [203.08086, 72058.0], [226.07762, 63460.0], [232.03409, 769638.0], [233.04245, 382457.0], [234.04965, 4404102.0], [235.05304, 725574.0], [236.04547, 251450.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C17H12", "inchikey": "HAPOJKSPCGLOOD-UHFFFAOYSA-N", "smiles": "C1C2=CC=CC=C2C2=C1C=C1C=CC=CC1=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": {"163.05414": "Theoretical m/z 163.054775, Mass diff 0 (0 ppm), Formula C13H7", "187.05412": "Theoretical m/z 187.054775, Mass diff 0 (0 ppm), Formula C15H7", "189.06975": "Theoretical m/z 189.070425, Mass diff 0 (0 ppm), Formula C15H9", "211.05426": "Theoretical m/z 211.054775, Mass diff 0 (0 ppm), Formula C17H7", "213.0699": "Theoretical m/z 213.070425, Mass diff 0 (0 ppm), Formula C17H9", "215.0855": "Theoretical m/z 215.086075, Mass diff 0 (0 ppm), Formula C17H11", "216.09326": "Theoretical m/z 216.093354, Mass diff 0 (0.44 ppm), SMILES C=1C=CC=2C=C3C(=CC2C1)C=4C=CC=CC4C3, Annotation [C17H12]+, Rule of HR False"}, "num_peaks": "21", "compound_name": "2,3-Benzofluorene", "retention_time": null, "retention_index": 2257.5, "precursor_mz": 216.09326, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[93.52689, 122035.0], [94.53471, 241743.0], [95.03638, 42130.0], [105.52688, 47526.0], [106.53471, 171653.0], [107.03639, 60004.0], [107.5425, 180288.0], [108.03385, 97471.0], [163.05414, 68194.0], [187.05412, 167183.0], [188.06201, 86262.0], [189.06975, 239421.0], [190.07321, 37778.0], [211.05426, 117044.0], [212.06215, 60813.0], [213.0699, 808499.0], [214.07317, 202222.0], [215.0855, 3634570.0], [216.09326, 2535030.0], [217.09671, 444170.0], [218.09999, 37976.0]]}], "n_row": 7, "n_col": 7, "row": [0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 6], "col": [0, 0, 1, 1, 2, 2, 3, 3, 4, 5, 6], "data": [[true], [true], [true], [true], [true], [true], [true], [true], [true], [true], [true]], "dtype": [["MetadataMatch_formula_equal_match_f0", "|b1"]]}\n\\ No newline at end of file\n' |
b |
diff -r c5076cc15e8c -r 86d265d2a334 test-data/metadata_match/inchi_key_inner.json --- a/test-data/metadata_match/inchi_key_inner.json Mon Apr 22 08:39:32 2024 +0000 +++ b/test-data/metadata_match/inchi_key_inner.json Thu May 30 18:08:27 2024 +0000 |
[ |
b'@@ -1,1 +1,1 @@\n-{"__Scores__": true, "is_symmetric": false, "references": [{"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "YNPNZTXNASCQKK-UHFFFAOYSA-N", "smiles": "C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "19", "compound_name": "Phenanthrene", "retention_time": null, "retention_index": 1832.9, "precursor_mz": 178.0775, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[74.01508, 137808.0], [75.02295, 278714.0], [76.03075, 608417.0], [87.02295, 304266.0], [88.03076, 497050.0], [89.03857, 441168.0], [98.01511, 150478.0], [150.04633, 868927.0], [151.05415, 546351.0], [152.0619, 2275502.0], [153.06528, 276320.0], [169.06468, 272559.0], [174.04636, 365846.0], [175.05423, 272039.0], [176.062, 3370523.0], [177.06982, 1751846.0], [178.0775, 13724432.0], [179.08078, 2250119.0], [180.08412, 138203.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "MWPLVEDNUUSJAV-UHFFFAOYSA-N", "smiles": "C1=CC2=CC3=C(C=CC=C3)C=C2C=C1", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "21", "compound_name": "Anthracene", "retention_time": null, "retention_index": 1844.4, "precursor_mz": 178.07754, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[74.0151, 117371.0], [75.02296, 245305.0], [76.03077, 632686.0], [87.02297, 236214.0], [88.03078, 347832.0], [89.0386, 507288.0], [91.05425, 130861.0], [126.04636, 128356.0], [149.04478, 126331.0], [150.04637, 641829.0], [151.05415, 403195.0], [152.06195, 1369833.0], [153.06534, 175290.0], [169.06471, 228827.0], [174.04639, 255716.0], [175.05423, 198784.0], [176.06204, 2563053.0], [177.06984, 1182037.0], [178.07754, 11002398.0], [179.08081, 1778803.0], [180.08418, 132922.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C16H10", "inchikey": "GVEPBJHOBDJJJI-UHFFFAOYSA-N", "smiles": "C1=CC2=C(C=C1)C1=C3C2=CC=CC3=CC=C1", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "17", "compound_name": "Fluoranthene", "retention_time": null, "retention_index": 2102.7, "precursor_mz": 202.07756, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[75.02299, 112456.0], [87.02298, 183640.0], [88.03079, 367434.0], [99.02296, 124952.0], [100.03078, 376079.0], [101.03863, 381288.0], [150.04642, 86059.0], [174.04634, 246963.0], [176.06194, 141676.0], [198.0464, 244370.0], [199.05429, 285767.0], [200.06207, 1958890.0], [201.06982, 1103710.0], [202.07756, 8104188.0], [203.08084, 1377015.0], [204.08421, 98067.0], [219.08043, 186623.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C16H10", "inchikey": "BBEAQIROQSPTKN-UHFFFAOYSA-N", "smiles": "C1=CC2=C3C(C=CC4=CC=CC(C=C2)=C34)=C1", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "16", "compound_name": "Pyrene", "retention_time": null, "retention_index": 2154.5, "precursor_mz": 202.07759, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[87.02299, 147113.0], [88.0308, 305149.0], [99.02298, 137042.0], [100.0308, 508914.0], [101.03864, 472094.0], [101.54032, 82430.0], [150.04637, 80741.0], [174.04631, 212706.0], [198.04643, 262925.0], [199.05429, 295144.0], [200.06209, 1960712.0], [201.06982, 1270466.0], [202.'..b'[100.0308, 508914.0], [101.03864, 472094.0], [101.54032, 82430.0], [150.04637, 80741.0], [174.04631, 212706.0], [198.04643, 262925.0], [199.05429, 295144.0], [200.06209, 1960712.0], [201.06982, 1270466.0], [202.07759, 7974712.0], [203.08086, 1335645.0], [204.0843, 121774.0], [219.08044, 182944.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C18H14", "inchikey": "XJKSTNDFUHDPQJ-UHFFFAOYSA-N", "smiles": "C1=CC=C(C=C1)C2=CC=C(C=C2)C3=CC=CC=C3", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "25", "compound_name": "para-Terphenyl", "retention_time": null, "retention_index": 2207.5, "precursor_mz": 230.10886, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[88.03077, 233962.0], [101.03861, 353544.0], [113.03854, 437845.0], [115.05423, 690291.0], [128.062, 293046.0], [151.05412, 266397.0], [152.06195, 961184.0], [153.06976, 409346.0], [176.06192, 232793.0], [189.06972, 408734.0], [200.0619, 342984.0], [201.06986, 238974.0], [202.07758, 1292332.0], [203.08093, 374439.0], [213.06973, 256631.0], [215.08548, 1220866.0], [216.08881, 217320.0], [224.062, 181393.0], [226.07771, 1503774.0], [227.08588, 1198323.0], [228.09344, 2885510.0], [229.10123, 1791323.0], [230.10886, 17890056.0], [231.11214, 3536976.0], [232.11542, 320231.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C16H10S", "inchikey": "YEUHHUCOSQOCIX-UHFFFAOYSA-N", "smiles": "S1C2=C(C=CC=C2)C2=C1C1=CC=CC=C1C=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "23", "compound_name": "Benzo[b]naphtho[2,1-d]thiophene", "retention_time": null, "retention_index": 2419.3, "precursor_mz": 234.04965, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[91.05425, 49130.0], [93.52686, 61653.0], [94.53469, 76638.0], [104.01679, 126871.0], [116.01675, 112547.0], [117.02459, 99108.0], [162.04623, 54491.0], [163.05408, 136418.0], [164.06198, 56321.0], [187.05406, 273349.0], [188.06189, 200750.0], [189.06969, 453336.0], [190.07306, 77884.0], [200.06189, 135620.0], [201.06973, 74692.0], [202.07758, 425058.0], [203.08086, 72058.0], [226.07762, 63460.0], [232.03409, 769638.0], [233.04245, 382457.0], [234.04965, 4404102.0], [235.05304, 725574.0], [236.04547, 251450.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C17H12", "inchikey": "HAPOJKSPCGLOOD-UHFFFAOYSA-N", "smiles": "C1C2=CC=CC=C2C2=C1C=C1C=CC=CC1=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "21", "compound_name": "2,3-Benzofluorene", "retention_time": null, "retention_index": 2257.5, "precursor_mz": 216.09326, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[93.52689, 122035.0], [94.53471, 241743.0], [95.03638, 42130.0], [105.52688, 47526.0], [106.53471, 171653.0], [107.03639, 60004.0], [107.5425, 180288.0], [108.03385, 97471.0], [163.05414, 68194.0], [187.05412, 167183.0], [188.06201, 86262.0], [189.06975, 239421.0], [190.07321, 37778.0], [211.05426, 117044.0], [212.06215, 60813.0], [213.0699, 808499.0], [214.07317, 202222.0], [215.0855, 3634570.0], [216.09326, 2535030.0], [217.09671, 444170.0], [218.09999, 37976.0]]}], "n_row": 7, "n_col": 7, "row": [0, 1, 2, 3, 4, 5, 6], "col": [0, 1, 2, 3, 4, 5, 6], "data": [[true, true], [true, true], [true, true], [true, true], [true, true], [true, true], [true, true]], "dtype": [["MetadataMatch_smiles_equal_match_f0", "|b1"], ["MetadataMatch_inchikey_equal_match_f0", "|b1"]]}\n\\ No newline at end of file\n' |
b |
diff -r c5076cc15e8c -r 86d265d2a334 test-data/metadata_match/smiles.json --- a/test-data/metadata_match/smiles.json Mon Apr 22 08:39:32 2024 +0000 +++ b/test-data/metadata_match/smiles.json Thu May 30 18:08:27 2024 +0000 |
[ |
b'@@ -1,1 +1,1 @@\n-{"__Scores__": true, "is_symmetric": false, "references": [{"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "YNPNZTXNASCQKK-UHFFFAOYSA-N", "smiles": "C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": {"74.01508": "Theoretical m/z 74.01565, Mass diff 0 (0 ppm), Formula C6H2", "75.02295": "Theoretical m/z 75.023475, Mass diff 0 (0 ppm), Formula C6H3", "76.03075": "Theoretical m/z 76.0313, Mass diff 0 (0 ppm), Formula C6H4", "87.02295": "Theoretical m/z 87.023475, Mass diff 0 (0 ppm), Formula C7H3", "88.03076": "Theoretical m/z 88.0313, Mass diff 0 (0 ppm), Formula C7H4", "89.03857": "Theoretical m/z 89.039125, Mass diff 0 (0 ppm), Formula C7H5", "151.05415": "Theoretical m/z 151.054775, Mass diff 0 (0 ppm), Formula C12H7", "175.05423": "Theoretical m/z 175.054775, Mass diff 0 (0 ppm), Formula C14H7", "176.062": "Theoretical m/z 176.0626, Mass diff 0 (0 ppm), Formula C14H8", "177.06982": "Theoretical m/z 177.070425, Mass diff 0 (0 ppm), Formula C14H9", "178.0775": "Theoretical m/z 178.077698, Mass diff 0 (1.11 ppm), SMILES C=1C=CC2=C(C1)C=CC=3C=CC=CC32, Annotation [C14H10]+, Rule of HR False"}, "num_peaks": "19", "compound_name": "Phenanthrene", "retention_time": null, "retention_index": 1832.9, "precursor_mz": 178.0775, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[74.01508, 137808.0], [75.02295, 278714.0], [76.03075, 608417.0], [87.02295, 304266.0], [88.03076, 497050.0], [89.03857, 441168.0], [98.01511, 150478.0], [150.04633, 868927.0], [151.05415, 546351.0], [152.0619, 2275502.0], [153.06528, 276320.0], [169.06468, 272559.0], [174.04636, 365846.0], [175.05423, 272039.0], [176.062, 3370523.0], [177.06982, 1751846.0], [178.0775, 13724432.0], [179.08078, 2250119.0], [180.08412, 138203.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "MWPLVEDNUUSJAV-UHFFFAOYSA-N", "smiles": "C1=CC2=CC3=C(C=CC=C3)C=C2C=C1", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": {"74.0151": "Theoretical m/z 74.01565, Mass diff 0 (0 ppm), Formula C6H2", "75.02296": "Theoretical m/z 75.023475, Mass diff 0 (0 ppm), Formula C6H3", "76.03077": "Theoretical m/z 76.0313, Mass diff 0 (0 ppm), Formula C6H4", "87.02297": "Theoretical m/z 87.023475, Mass diff 0 (0 ppm), Formula C7H3", "88.03078": "Theoretical m/z 88.0313, Mass diff 0 (0 ppm), Formula C7H4", "89.0386": "Theoretical m/z 89.039125, Mass diff 0 (0 ppm), Formula C7H5", "91.05425": "Theoretical m/z 91.054775, Mass diff 0 (0 ppm), Formula C7H7", "151.05415": "Theoretical m/z 151.054775, Mass diff 0 (0 ppm), Formula C12H7", "175.05423": "Theoretical m/z 175.054775, Mass diff 0 (0 ppm), Formula C14H7", "176.06204": "Theoretical m/z 176.0626, Mass diff 0 (0 ppm), Formula C14H8", "177.06984": "Theoretical m/z 177.070425, Mass diff 0 (0 ppm), Formula C14H9", "178.07754": "Theoretical m/z 178.077698, Mass diff 0 (0.89 ppm), SMILES C=1C=CC=2C=C3C=CC=CC3=CC2C1, Annotation [C14H10]+, Rule of HR False"}, "num_peaks": "21", "compound_name": "Anthracene", "retention_time": null, "retention_index": 1844.4, "precursor_mz": 178.07754, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[74.0151, 117371.0], [75.02296, 245305.0], [76.03077, 632686.0], [87.02297, 236214.0], [88.03078, 347832.0], [89.0386, 507288.0], [91.05425, 130861.0], [126.04636, 128356.0], [149.04478, 126331.0], [150.04637, 641829.0], [151.05415, 403195.0], [152.06195, 1369833.0], [153.06534, 175290.0], [169.06471, 228827.0], [174.04639, 255716.0], [175.05423, 198784.0], [176.06204, 2563053.0], [177.06984, 1182037.0], [178.07754, 11002398.0], [179.080'..b'986, 238974.0], [202.07758, 1292332.0], [203.08093, 374439.0], [213.06973, 256631.0], [215.08548, 1220866.0], [216.08881, 217320.0], [224.062, 181393.0], [226.07771, 1503774.0], [227.08588, 1198323.0], [228.09344, 2885510.0], [229.10123, 1791323.0], [230.10886, 17890056.0], [231.11214, 3536976.0], [232.11542, 320231.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C16H10S", "inchikey": "YEUHHUCOSQOCIX-UHFFFAOYSA-N", "smiles": "S1C2=C(C=CC=C2)C2=C1C1=CC=CC=C1C=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": {"91.05425": "Theoretical m/z 91.054775, Mass diff 0 (0 ppm), Formula C7H7", "163.05408": "Theoretical m/z 163.054775, Mass diff 0 (0 ppm), Formula C13H7", "187.05406": "Theoretical m/z 187.054775, Mass diff 0 (0 ppm), Formula C15H7", "189.06969": "Theoretical m/z 189.070425, Mass diff 0 (0 ppm), Formula C15H9", "201.06973": "Theoretical m/z 201.070425, Mass diff 0 (0 ppm), Formula C16H9", "233.04245": "Theoretical m/z 233.042496, Mass diff -0.001 (0 ppm), Formula C16H9S", "234.04965": "Theoretical m/z 234.049775, Mass diff 0 (0.53 ppm), SMILES S1C=2C=CC=CC2C=3C=CC=4C=CC=CC4C13, Annotation [C16H10S]+, Rule of HR False"}, "num_peaks": "23", "compound_name": "Benzo[b]naphtho[2,1-d]thiophene", "retention_time": null, "retention_index": 2419.3, "precursor_mz": 234.04965, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[91.05425, 49130.0], [93.52686, 61653.0], [94.53469, 76638.0], [104.01679, 126871.0], [116.01675, 112547.0], [117.02459, 99108.0], [162.04623, 54491.0], [163.05408, 136418.0], [164.06198, 56321.0], [187.05406, 273349.0], [188.06189, 200750.0], [189.06969, 453336.0], [190.07306, 77884.0], [200.06189, 135620.0], [201.06973, 74692.0], [202.07758, 425058.0], [203.08086, 72058.0], [226.07762, 63460.0], [232.03409, 769638.0], [233.04245, 382457.0], [234.04965, 4404102.0], [235.05304, 725574.0], [236.04547, 251450.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C17H12", "inchikey": "HAPOJKSPCGLOOD-UHFFFAOYSA-N", "smiles": "C1C2=CC=CC=C2C2=C1C=C1C=CC=CC1=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": {"163.05414": "Theoretical m/z 163.054775, Mass diff 0 (0 ppm), Formula C13H7", "187.05412": "Theoretical m/z 187.054775, Mass diff 0 (0 ppm), Formula C15H7", "189.06975": "Theoretical m/z 189.070425, Mass diff 0 (0 ppm), Formula C15H9", "211.05426": "Theoretical m/z 211.054775, Mass diff 0 (0 ppm), Formula C17H7", "213.0699": "Theoretical m/z 213.070425, Mass diff 0 (0 ppm), Formula C17H9", "215.0855": "Theoretical m/z 215.086075, Mass diff 0 (0 ppm), Formula C17H11", "216.09326": "Theoretical m/z 216.093354, Mass diff 0 (0.44 ppm), SMILES C=1C=CC=2C=C3C(=CC2C1)C=4C=CC=CC4C3, Annotation [C17H12]+, Rule of HR False"}, "num_peaks": "21", "compound_name": "2,3-Benzofluorene", "retention_time": null, "retention_index": 2257.5, "precursor_mz": 216.09326, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[93.52689, 122035.0], [94.53471, 241743.0], [95.03638, 42130.0], [105.52688, 47526.0], [106.53471, 171653.0], [107.03639, 60004.0], [107.5425, 180288.0], [108.03385, 97471.0], [163.05414, 68194.0], [187.05412, 167183.0], [188.06201, 86262.0], [189.06975, 239421.0], [190.07321, 37778.0], [211.05426, 117044.0], [212.06215, 60813.0], [213.0699, 808499.0], [214.07317, 202222.0], [215.0855, 3634570.0], [216.09326, 2535030.0], [217.09671, 444170.0], [218.09999, 37976.0]]}], "n_row": 7, "n_col": 7, "row": [0, 1, 2, 3, 4, 5, 6], "col": [0, 1, 2, 3, 4, 5, 6], "data": [[true], [true], [true], [true], [true], [true], [true]], "dtype": [["MetadataMatch_smiles_equal_match_f0", "|b1"]]}\n\\ No newline at end of file\n' |
b |
diff -r c5076cc15e8c -r 86d265d2a334 test-data/remove_spectra/require_compound_name.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/remove_spectra/require_compound_name.msp Thu May 30 18:08:27 2024 +0000 |
[ |
@@ -0,0 +1,106 @@ +FORMULA: C13H14O +CASNO: 2000130-22-2 +ID: 7198 +COMMENT: SpectrumID: 1752764; Source: A1-13-956/SMS7-13; DOI: 10.1021/ol1029996; QI: 383; Class: Benzene and substituted derivatives; CASRN not real! |RI:1588| +COMPOUND_NAME: ((1R*,2R*)-1-Methyl-2-phenylethynylcyclopropyl)methanol +PARENT_MASS: 186.1044655 +RETENTION_INDEX: 1588.0 +PUBCHEMID: 130762197 +NOMINAL_MASS: 186 +INCHI: nan +NUM PEAKS: 20 +51.0 89.92 +63.0 89.92 +77.0 179.84 +88.0 39.96 +89.0 59.95 +91.0 49.95 +102.0 149.86 +113.0 49.95 +115.0 229.79 +127.0 139.87 +128.0 999.0 +129.0 199.82 +144.0 99.91 +155.0 119.89 +156.0 14.89 +157.0 1.1 +158.0 0.1 +186.0 39.96 +187.0 5.89 +188.0 0.5 + +FORMULA: C34H54O4 +CASNO: 2000774-54-3 +ID: 36905 +COMMENT: SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353| +COMPOUND_NAME: ((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate +PARENT_MASS: 526.40221 +RETENTION_INDEX: 3353.0 +PUBCHEMID: 236415 +NOMINAL_MASS: 526 +INCHI: InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1 +SMILES: C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12 +NUM PEAKS: 14 +189.0 419.62 +203.0 249.77 +216.0 149.86 +262.0 79.93 +276.0 49.95 +393.0 149.86 +423.0 219.8 +453.0 179.84 +466.0 999.0 +526.0 179.84 +527.0 68.94 +528.0 14.29 +529.0 2.1 +530.0 0.2 + +IONMODE: negative +SPECTRUMTYPE: Centroid +COMPOUND_NAME: C001 +RETENTION_TIME: 38.74 +RETENTION_INDEX: None +NUM PEAKS: 16 +138.9121 10186226.0 +175.0641 26780143.0 +196.8658 21390430.0 +198.8647 21688594.0 +206.9034 26130980.0 +254.8252 23747536.0 +258.8237 15532799.0 +266.8652 9805546.0 +312.7841 10051801.0 +316.7777 10734168.0 +372.7383 19374863.0 +382.8218 12815572.0 +392.7685 10913351.0 +434.7287 9943329.0 +440.7322 10603010.0 +488.6825 12267966.0 + +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C20H12 +INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N +SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Perylene +RETENTION_TIME: None +RETENTION_INDEX: 2886.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 251.08595400000002 +PRECURSOR_MZ: 251.08540542009078 +NUM PEAKS: 3 +250.07765 0.3282529462971431 +252.09323 1.0 +253.09656 0.20573802940517583 + |
b |
diff -r c5076cc15e8c -r 86d265d2a334 test-data/remove_spectra/require_filter.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/remove_spectra/require_filter.msp Thu May 30 18:08:27 2024 +0000 |
[ |
@@ -0,0 +1,117 @@ +CASNO: 2000570-99-8 +ID: 2011 +COMMENT: SpectrumID: 1519953; Source: C4-1998-38-3; Class: Benzenoids; CASRN not real! +PARENT_MASS: 347.930801 +PUBCHEMID: 10970124 +NOMINAL_MASS: 348 +SMILES: nan +NUM PEAKS: 3 +292.0 999.0 +314.0 118.89 +348.0 734.24 + +FORMULA: C13H14O +CASNO: 2000130-22-2 +ID: 7198 +COMMENT: SpectrumID: 1752764; Source: A1-13-956/SMS7-13; DOI: 10.1021/ol1029996; QI: 383; Class: Benzene and substituted derivatives; CASRN not real! |RI:1588| +COMPOUND_NAME: ((1R*,2R*)-1-Methyl-2-phenylethynylcyclopropyl)methanol +PARENT_MASS: 186.1044655 +RETENTION_INDEX: 1588.0 +PUBCHEMID: 130762197 +NOMINAL_MASS: 186 +INCHI: nan +NUM PEAKS: 20 +51.0 89.92 +63.0 89.92 +77.0 179.84 +88.0 39.96 +89.0 59.95 +91.0 49.95 +102.0 149.86 +113.0 49.95 +115.0 229.79 +127.0 139.87 +128.0 999.0 +129.0 199.82 +144.0 99.91 +155.0 119.89 +156.0 14.89 +157.0 1.1 +158.0 0.1 +186.0 39.96 +187.0 5.89 +188.0 0.5 + +FORMULA: C34H54O4 +CASNO: 2000774-54-3 +ID: 36905 +COMMENT: SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353| +COMPOUND_NAME: ((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate +PARENT_MASS: 526.40221 +RETENTION_INDEX: 3353.0 +PUBCHEMID: 236415 +NOMINAL_MASS: 526 +INCHI: InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1 +SMILES: C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12 +NUM PEAKS: 14 +189.0 419.62 +203.0 249.77 +216.0 149.86 +262.0 79.93 +276.0 49.95 +393.0 149.86 +423.0 219.8 +453.0 179.84 +466.0 999.0 +526.0 179.84 +527.0 68.94 +528.0 14.29 +529.0 2.1 +530.0 0.2 + +IONMODE: negative +SPECTRUMTYPE: Centroid +COMPOUND_NAME: C001 +RETENTION_TIME: 38.74 +RETENTION_INDEX: None +NUM PEAKS: 16 +138.9121 10186226.0 +175.0641 26780143.0 +196.8658 21390430.0 +198.8647 21688594.0 +206.9034 26130980.0 +254.8252 23747536.0 +258.8237 15532799.0 +266.8652 9805546.0 +312.7841 10051801.0 +316.7777 10734168.0 +372.7383 19374863.0 +382.8218 12815572.0 +392.7685 10913351.0 +434.7287 9943329.0 +440.7322 10603010.0 +488.6825 12267966.0 + +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C20H12 +INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N +SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Perylene +RETENTION_TIME: None +RETENTION_INDEX: 2886.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 251.08595400000002 +PRECURSOR_MZ: 251.08540542009078 +NUM PEAKS: 3 +250.07765 0.3282529462971431 +252.09323 1.0 +253.09656 0.20573802940517583 \ No newline at end of file |
b |
diff -r c5076cc15e8c -r 86d265d2a334 test-data/remove_spectra/require_formula.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/remove_spectra/require_formula.msp Thu May 30 18:08:27 2024 +0000 |
[ |
@@ -0,0 +1,83 @@ +FORMULA: C13H14O +CASNO: 2000130-22-2 +ID: 7198 +COMMENT: SpectrumID: 1752764; Source: A1-13-956/SMS7-13; DOI: 10.1021/ol1029996; QI: 383; Class: Benzene and substituted derivatives; CASRN not real! |RI:1588| +COMPOUND_NAME: ((1R*,2R*)-1-Methyl-2-phenylethynylcyclopropyl)methanol +PARENT_MASS: 186.1044655 +RETENTION_INDEX: 1588.0 +PUBCHEMID: 130762197 +NOMINAL_MASS: 186 +INCHI: nan +NUM PEAKS: 20 +51.0 89.92 +63.0 89.92 +77.0 179.84 +88.0 39.96 +89.0 59.95 +91.0 49.95 +102.0 149.86 +113.0 49.95 +115.0 229.79 +127.0 139.87 +128.0 999.0 +129.0 199.82 +144.0 99.91 +155.0 119.89 +156.0 14.89 +157.0 1.1 +158.0 0.1 +186.0 39.96 +187.0 5.89 +188.0 0.5 + +FORMULA: C34H54O4 +CASNO: 2000774-54-3 +ID: 36905 +COMMENT: SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353| +COMPOUND_NAME: ((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate +PARENT_MASS: 526.40221 +RETENTION_INDEX: 3353.0 +PUBCHEMID: 236415 +NOMINAL_MASS: 526 +INCHI: InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1 +SMILES: C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12 +NUM PEAKS: 14 +189.0 419.62 +203.0 249.77 +216.0 149.86 +262.0 79.93 +276.0 49.95 +393.0 149.86 +423.0 219.8 +453.0 179.84 +466.0 999.0 +526.0 179.84 +527.0 68.94 +528.0 14.29 +529.0 2.1 +530.0 0.2 + +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C20H12 +INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N +SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Perylene +RETENTION_TIME: None +RETENTION_INDEX: 2886.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 251.08595400000002 +PRECURSOR_MZ: 251.08540542009078 +NUM PEAKS: 3 +250.07765 0.3282529462971431 +252.09323 1.0 +253.09656 0.20573802940517583 + |
b |
diff -r c5076cc15e8c -r 86d265d2a334 test-data/remove_spectra/require_inchi.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/remove_spectra/require_inchi.msp Thu May 30 18:08:27 2024 +0000 |
[ |
@@ -0,0 +1,27 @@ +FORMULA: C34H54O4 +CASNO: 2000774-54-3 +ID: 36905 +COMMENT: SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353| +COMPOUND_NAME: ((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate +PARENT_MASS: 526.40221 +RETENTION_INDEX: 3353.0 +PUBCHEMID: 236415 +NOMINAL_MASS: 526 +INCHI: InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1 +SMILES: C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12 +NUM PEAKS: 14 +189.0 419.62 +203.0 249.77 +216.0 149.86 +262.0 79.93 +276.0 49.95 +393.0 149.86 +423.0 219.8 +453.0 179.84 +466.0 999.0 +526.0 179.84 +527.0 68.94 +528.0 14.29 +529.0 2.1 +530.0 0.2 + |
b |
diff -r c5076cc15e8c -r 86d265d2a334 test-data/remove_spectra/require_inchi_removed_spectra.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/remove_spectra/require_inchi_removed_spectra.msp Thu May 30 18:08:27 2024 +0000 |
[ |
@@ -0,0 +1,91 @@ +CASNO: 2000570-99-8 +ID: 2011 +COMMENT: SpectrumID: 1519953; Source: C4-1998-38-3; Class: Benzenoids; CASRN not real! +PARENT_MASS: 347.930801 +PUBCHEMID: 10970124 +NOMINAL_MASS: 348 +SMILES: nan +NUM PEAKS: 3 +292.0 999.0 +314.0 118.89 +348.0 734.24 + +FORMULA: C13H14O +CASNO: 2000130-22-2 +ID: 7198 +COMMENT: SpectrumID: 1752764; Source: A1-13-956/SMS7-13; DOI: 10.1021/ol1029996; QI: 383; Class: Benzene and substituted derivatives; CASRN not real! |RI:1588| +COMPOUND_NAME: ((1R*,2R*)-1-Methyl-2-phenylethynylcyclopropyl)methanol +PARENT_MASS: 186.1044655 +RETENTION_INDEX: 1588.0 +PUBCHEMID: 130762197 +NOMINAL_MASS: 186 +INCHI: nan +NUM PEAKS: 20 +51.0 89.92 +63.0 89.92 +77.0 179.84 +88.0 39.96 +89.0 59.95 +91.0 49.95 +102.0 149.86 +113.0 49.95 +115.0 229.79 +127.0 139.87 +128.0 999.0 +129.0 199.82 +144.0 99.91 +155.0 119.89 +156.0 14.89 +157.0 1.1 +158.0 0.1 +186.0 39.96 +187.0 5.89 +188.0 0.5 + +IONMODE: negative +SPECTRUMTYPE: Centroid +COMPOUND_NAME: C001 +RETENTION_TIME: 38.74 +RETENTION_INDEX: None +NUM PEAKS: 16 +138.9121 10186226.0 +175.0641 26780143.0 +196.8658 21390430.0 +198.8647 21688594.0 +206.9034 26130980.0 +254.8252 23747536.0 +258.8237 15532799.0 +266.8652 9805546.0 +312.7841 10051801.0 +316.7777 10734168.0 +372.7383 19374863.0 +382.8218 12815572.0 +392.7685 10913351.0 +434.7287 9943329.0 +440.7322 10603010.0 +488.6825 12267966.0 + +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C20H12 +INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N +SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Perylene +RETENTION_TIME: None +RETENTION_INDEX: 2886.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 251.08595400000002 +PRECURSOR_MZ: 251.08540542009078 +NUM PEAKS: 3 +250.07765 0.3282529462971431 +252.09323 1.0 +253.09656 0.20573802940517583 + |
b |
diff -r c5076cc15e8c -r 86d265d2a334 test-data/remove_spectra/require_inchikey.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/remove_spectra/require_inchikey.msp Thu May 30 18:08:27 2024 +0000 |
[ |
@@ -0,0 +1,24 @@ +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C20H12 +INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N +SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Perylene +RETENTION_TIME: None +RETENTION_INDEX: 2886.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 251.08595400000002 +PRECURSOR_MZ: 251.08540542009078 +NUM PEAKS: 3 +250.07765 0.3282529462971431 +252.09323 1.0 +253.09656 0.20573802940517583 + |
b |
diff -r c5076cc15e8c -r 86d265d2a334 test-data/remove_spectra/require_precursor_mz.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/remove_spectra/require_precursor_mz.msp Thu May 30 18:08:27 2024 +0000 |
[ |
@@ -0,0 +1,24 @@ +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C20H12 +INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N +SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Perylene +RETENTION_TIME: None +RETENTION_INDEX: 2886.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 251.08595400000002 +PRECURSOR_MZ: 251.08540542009078 +NUM PEAKS: 3 +250.07765 0.3282529462971431 +252.09323 1.0 +253.09656 0.20573802940517583 + |
b |
diff -r c5076cc15e8c -r 86d265d2a334 test-data/remove_spectra/require_retention_index.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/remove_spectra/require_retention_index.msp Thu May 30 18:08:27 2024 +0000 |
[ |
@@ -0,0 +1,83 @@ +FORMULA: C13H14O +CASNO: 2000130-22-2 +ID: 7198 +COMMENT: SpectrumID: 1752764; Source: A1-13-956/SMS7-13; DOI: 10.1021/ol1029996; QI: 383; Class: Benzene and substituted derivatives; CASRN not real! |RI:1588| +COMPOUND_NAME: ((1R*,2R*)-1-Methyl-2-phenylethynylcyclopropyl)methanol +PARENT_MASS: 186.1044655 +RETENTION_INDEX: 1588.0 +PUBCHEMID: 130762197 +NOMINAL_MASS: 186 +INCHI: nan +NUM PEAKS: 20 +51.0 89.92 +63.0 89.92 +77.0 179.84 +88.0 39.96 +89.0 59.95 +91.0 49.95 +102.0 149.86 +113.0 49.95 +115.0 229.79 +127.0 139.87 +128.0 999.0 +129.0 199.82 +144.0 99.91 +155.0 119.89 +156.0 14.89 +157.0 1.1 +158.0 0.1 +186.0 39.96 +187.0 5.89 +188.0 0.5 + +FORMULA: C34H54O4 +CASNO: 2000774-54-3 +ID: 36905 +COMMENT: SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353| +COMPOUND_NAME: ((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate +PARENT_MASS: 526.40221 +RETENTION_INDEX: 3353.0 +PUBCHEMID: 236415 +NOMINAL_MASS: 526 +INCHI: InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1 +SMILES: C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12 +NUM PEAKS: 14 +189.0 419.62 +203.0 249.77 +216.0 149.86 +262.0 79.93 +276.0 49.95 +393.0 149.86 +423.0 219.8 +453.0 179.84 +466.0 999.0 +526.0 179.84 +527.0 68.94 +528.0 14.29 +529.0 2.1 +530.0 0.2 + +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C20H12 +INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N +SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Perylene +RETENTION_TIME: None +RETENTION_INDEX: 2886.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 251.08595400000002 +PRECURSOR_MZ: 251.08540542009078 +NUM PEAKS: 3 +250.07765 0.3282529462971431 +252.09323 1.0 +253.09656 0.20573802940517583 + |
b |
diff -r c5076cc15e8c -r 86d265d2a334 test-data/remove_spectra/require_retention_time.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/remove_spectra/require_retention_time.msp Thu May 30 18:08:27 2024 +0000 |
b |
@@ -0,0 +1,23 @@ +IONMODE: negative +SPECTRUMTYPE: Centroid +COMPOUND_NAME: C001 +RETENTION_TIME: 38.74 +RETENTION_INDEX: None +NUM PEAKS: 16 +138.9121 10186226.0 +175.0641 26780143.0 +196.8658 21390430.0 +198.8647 21688594.0 +206.9034 26130980.0 +254.8252 23747536.0 +258.8237 15532799.0 +266.8652 9805546.0 +312.7841 10051801.0 +316.7777 10734168.0 +372.7383 19374863.0 +382.8218 12815572.0 +392.7685 10913351.0 +434.7287 9943329.0 +440.7322 10603010.0 +488.6825 12267966.0 + |
b |
diff -r c5076cc15e8c -r 86d265d2a334 test-data/remove_spectra/require_smiles.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/remove_spectra/require_smiles.msp Thu May 30 18:08:27 2024 +0000 |
[ |
@@ -0,0 +1,51 @@ +FORMULA: C34H54O4 +CASNO: 2000774-54-3 +ID: 36905 +COMMENT: SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353| +COMPOUND_NAME: ((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate +PARENT_MASS: 526.40221 +RETENTION_INDEX: 3353.0 +PUBCHEMID: 236415 +NOMINAL_MASS: 526 +INCHI: InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1 +SMILES: C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12 +NUM PEAKS: 14 +189.0 419.62 +203.0 249.77 +216.0 149.86 +262.0 79.93 +276.0 49.95 +393.0 149.86 +423.0 219.8 +453.0 179.84 +466.0 999.0 +526.0 179.84 +527.0 68.94 +528.0 14.29 +529.0 2.1 +530.0 0.2 + +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C20H12 +INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N +SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Perylene +RETENTION_TIME: None +RETENTION_INDEX: 2886.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 251.08595400000002 +PRECURSOR_MZ: 251.08540542009078 +NUM PEAKS: 3 +250.07765 0.3282529462971431 +252.09323 1.0 +253.09656 0.20573802940517583 + |