Repository 'ms2deepscore_training'
hg clone https://toolshed.g2.bx.psu.edu/repos/recetox/ms2deepscore_training

Changeset 0:0a0529822d91 (2024-09-02)
Commit message:
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/ms2deepscore commit 4bd610e0cbbcbed51a6bfb880179777fc8034fd6
added:
macros.xml
ms2deepscore_training.xml
test-data/Model_Parameter_JSON.json
test-data/Trained_model.onnx
test-data/clean_spectra.mgf
test-data/inp_filtered_library.msp
test-data/inp_filtered_spectra.msp
test-data/msp_json_score_out.json
test-data/ri_match_60.json
test-data/usescore_json_score_out.json
b
diff -r 000000000000 -r 0a0529822d91 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Sep 02 12:12:30 2024 +0000
[
b'@@ -0,0 +1,150 @@\n+<macros>\n+    <token name="@TOOL_VERSION@">2.0.0</token>\n+    <token name="@ONNX_VERSION@">1.16.2</token>\n+\n+    <xml name="creator">\n+        <creator>\n+            <person\n+                givenName="Zargham"\n+                familyName="Ahmad"\n+                url="https://github.com/zargham-ahmad"\n+                identifier="0000-0002-6096-224X" />\n+            <organization\n+                url="https://www.recetox.muni.cz/"\n+                email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"\n+                name="RECETOX MUNI" />\n+        </creator>\n+    </xml>\n+\n+    <xml name="edam">\n+        <xrefs>\n+            <xref type="bio.tools">ms2deepscore</xref>\n+        </xrefs>\n+    </xml>\n+\n+    <xml name="input_param">\n+        <conditional name="scores">\n+            <param name="use_scores" label="Use Scores Object" type="select">\n+                <option value="False" selected="true">FALSE</option>\n+                <option value="True">TRUE</option>\n+            </param>\n+            <when value="True">\n+                <param label="Scores object" name="scores_in" type="data" format="json"\n+                    help="Scores objects calculated previously using one of the matchms similarity tools." />\n+            </when>\n+            <when value="False">\n+                <param label="Queries spectra" name="queries" type="data" format="msp"\n+                    help="Query mass spectra to match against references."/>\n+                <param label="Reference spectra" name="references" type="data" format="msp"\n+                    help="Reference mass spectra to match against as library."/>\n+            </when>\n+        </conditional>\n+        <param name="model" type="data" format="onnx" label="Model" \n+            help="Select the trained MS2DeepScore model file (onnx format) in the ONNX format as created by the \'MS2DeepScore Training\' tool."/>\n+        <param name="model_param" type="data" format="json" label="Configuration"\n+            help="Select the MS2DeepScore model configurations  in JSON format. Can be created using the \'MS2DeepScore Config Generator\' tool."/>\n+    </xml>\n+\n+    <xml name="training_param">\n+        <param label="Training Dataset" name="spectra" type="data" format="msp,mgf"\n+            help="Spectra file that should be used for training. (it will be split in training, validation and test sets)."/>\n+        <param name="model_param" type="data" format="json" label="Model Settings" help="json file with the MS2Deepscore model settings."/>\n+        <param name="validation_split_fraction" type="integer" min="0" max="100" value="20" label="Validation split fraction [%]" \n+            help="The fraction of the inchikeys that will be used for validation and test"/>\n+    </xml>\n+\n+    <xml name="config_generator">\n+        <section name="model_structure" title="Model Structure" expanded="true">\n+            <repeat name="layers" title="Layer" min="1" default="1" >\n+                <param name="dims" type="integer" label="Dimensions" min = "0" value="2000" help="Size of the in-between layer to add." />\n+            </repeat>\n+            <param name="embedding_dim" type="integer" label="Embedding Dimension" value="400" help="The dimension of the final embedding layer." />\n+            <param name="ionisation_mode" type="select" label="Ionisation Mode">\n+                <option value="positive" selected="true">Positive</option>\n+                <option value="negative">Negative</option>\n+                <option value="both">Both</option>\n+            </param>\n+        </section>\n+        \n+        <section name="tensorization_settings" title="Tensorization Settings" expanded="true">\n+            <param name="min_mz" type="integer" label="Min m/z" value="10" />\n+            <param name="max_mz" type="integer" label="Max m/z" value="1000" />\n+            <param name="mz_bin_width" type="float" label="m/z Bin Width" value="0.1" />\n+            <param name="intensity_scaling" type'..b'tion>\n+                \n+\n+        <section name="training_settings" title="Training Settings" expanded="false">\n+            <param name="dropout_rate" type="float" label="Dropout Rate" value="0.0" />\n+            <param name="learning_rate" type="float" label="Learning Rate" value="0.00025" />\n+            <param name="epochs" type="integer" label="Epochs" value="250" />\n+            <param name="patience" type="integer" label="Patience" value="20" help="How long the model should keep training if validation does not improve" />\n+            <param name="loss_function" type="select" label="Loss Function">\n+                <option value="mse" selected="true">Mean Squared Error (mse)</option>\n+                <option value="mae">Mean Absolute Error (mae)</option>\n+                <option value="rmse">Root Mean Squared Error (rmse)</option>\n+                <option value="risk_mae">Risk Aware MAE (risk_aware_mae)</option>\n+                <option value="risk_mse">Risk Aware MSE (risk_aware_mse)</option>\n+            </param>\n+            <param name="weighting_factor" type="integer" label="Weighting Factor" value="0" />\n+            <param name="batch_size" type="integer" value="32" label="Batch Size" help="Number of pairs per batch" />\n+            <param name="average_pairs_per_bin" type="integer" value="20" label="Average pairs per bin" help="The aimed average number of pairs of spectra per spectrum in each bin." />\n+            <param name="random_seed" type="text" label="Random seed" value="None" help="Specify random seed for reproducible random number generation." />\n+        </section>\n+    </xml>\n+\n+    <xml name="citations">\n+        <citations>\n+            <citation type="doi">https://doi.org/10.1186/s13321-021-00558-4</citation>\n+            <citation type="doi">https://doi.org/10.1101/2024.03.25.586580</citation>\n+        </citations>\n+    </xml>\n+\n+\n+<token name="@HELP@">\n+    ms2deepscore provides a Siamese neural network that is trained to predict molecular structural similarities (Tanimoto scores) from pairs of mass spectrometry spectra.\n+    The library provides an intuitive classes to prepare data, train a siamese model, and compute similarities between pairs of spectra.\n+    In addition to the prediction of a structural similarity, MS2DeepScore can also make use of Monte-Carlo dropout to assess the model uncertainty.\n+    MS2DeepScore is able to identify highly-reliable structural matches and to predict Tanimoto scores for pairs of molecules based on their fragment spectra with a root mean squared error of about 0.15.\n+    Furthermore, the prediction uncertainty estimate can be used to select a subset of predictions with a root mean squared error of about 0.1.\n+    MS2DeepScore can also be used to create chemically meaningful mass spectral embeddings that could be used to cluster large numbers of spectra.\n+</token>\n+\n+\n+<token name="@init_scores@">\n+from matchms.importing import load_from_msp, scores_from_json\n+from matchms import Scores\n+#if $scores.use_scores == "True"\n+scores = scores_from_json("${scores_in}")\n+#else\n+scores = Scores(references=list(load_from_msp("$references")), queries=list(load_from_msp("$queries")), is_symmetric=False)\n+#end if\n+</token>\n+        \n+<token name="@init_logger@">\n+from matchms import set_matchms_logger_level\n+set_matchms_logger_level("WARNING")\n+</token>\n+\n+<token name="@json_load@">\n+import numpy as np\n+import json\n+\n+with open("$model_param", \'r\') as json_file:\n+    model_params = json.load(json_file)\n+\n+# Conditionally convert specific keys if they are present\n+if \'base_dims\' in model_params:\n+    model_params[\'base_dims\'] = tuple(model_params[\'base_dims\'])\n+\n+if \'same_prob_bins\' in model_params:\n+    model_params[\'same_prob_bins\'] = np.array(model_params[\'same_prob_bins\'])\n+\n+if \'additional_metadata\' in model_params:\n+    model_params[\'additional_metadata\'] = [\n+        (entry[0], entry[1]) for entry in model_params[\'additional_metadata\']\n+    ]\n+</token>\n+</macros>\n'
b
diff -r 000000000000 -r 0a0529822d91 ms2deepscore_training.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ms2deepscore_training.xml Mon Sep 02 12:12:30 2024 +0000
[
@@ -0,0 +1,91 @@
+<tool id="ms2deepscore_training" name="MS2DeepScore Model Training" version="@TOOL_VERSION@+galaxy0">
+    <description>Compute similarity scores using a pre-trained MS2DeepScore model</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="creator"/>
+    <expand macro="edam" />
+
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">ms2deepscore</requirement>
+        <requirement type="package" version="@ONNX_VERSION@">onnx</requirement>
+    </requirements>
+    
+    <command detect_errors="exit_code"><![CDATA[
+        mkdir processing;
+        cp $spectra processing/input."$spectra.ext";
+        python3 ${python_wrapper}
+    ]]></command>
+<configfiles>
+<configfile name="python_wrapper">
+import onnx
+import os
+import torch
+from ms2deepscore.models import load_model
+from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore
+from ms2deepscore.wrapper_functions.training_wrapper_functions import train_ms2deepscore_wrapper, StoreTrainingData
+
+@json_load@
+
+settings = SettingsMS2Deepscore(**model_params)
+file = "processing/input.$spectra.ext"
+directory = train_ms2deepscore_wrapper(file, settings, $validation_split_fraction)
+
+expected_file_names = StoreTrainingData(file)
+pt_model_path = os.path.join(expected_file_names.trained_models_folder, directory, settings.model_file_name)
+
+model = load_model(pt_model_path)
+model.eval()
+
+batch_size = 1
+number_of_bins = settings.number_of_bins()
+additional_inputs = len(settings.additional_metadata)
+
+# Create dummy inputs
+spectra_tensors_1 = torch.randn(batch_size, number_of_bins)
+spectra_tensors_2 = torch.randn(batch_size, number_of_bins)
+metadata_1 = torch.randn(batch_size, additional_inputs)
+metadata_2 = torch.randn(batch_size, additional_inputs)
+
+# Export the model to ONNX
+torch.onnx.export(
+    model,
+    (spectra_tensors_1, spectra_tensors_2, metadata_1, metadata_2),
+    "$onnx_trained_model",
+    verbose=True
+)
+
+</configfile>
+</configfiles>
+    
+    <inputs>
+        <expand macro="training_param" />
+    </inputs>
+    
+    <outputs>
+        <data label="Trained model" name="onnx_trained_model" format="onnx"/>
+    </outputs>
+
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="spectra" value="clean_spectra.mgf" ftype="mgf"/>
+            <param name="model_param" value="Model_Parameter_JSON.json" ftype="json" />
+            <param name="validation_split_fraction" value="5"/>
+            <output name="onnx_trained_model" value="Trained_model.onnx" ftype="onnx" compare="sim_size"/>
+        </test>
+    </tests>
+
+    <help>
+<![CDATA[
+Info
+====
+This tool trains a MS2DeepScore model using the provided training data and model configuration.
+The trained model is then exported using ONNX.
+
+About
+=====
+@HELP@
+]]>
+    </help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 0a0529822d91 test-data/Model_Parameter_JSON.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Model_Parameter_JSON.json Mon Sep 02 12:12:30 2024 +0000
[
@@ -0,0 +1,50 @@
+{
+    "base_dims": [
+        20,
+        20
+    ],
+    "embedding_dim": 15,
+    "ionisation_mode": "negative",
+    "train_binning_layer": false,
+    "train_binning_layer_group_size": 20,
+    "train_binning_layer_output_per_group": 2,
+    "dropout_rate": 0.0,
+    "learning_rate": 0.00025,
+    "epochs": 2,
+    "patience": 20,
+    "loss_function": "mse",
+    "weighting_factor": 0,
+    "model_file_name": "ms2deepscore_model.pt",
+    "history_plot_file_name": "history.svg",
+    "time_stamp": "2024_08_16_07_50_22",
+    "min_mz": 10,
+    "max_mz": 1000,
+    "mz_bin_width": 0.1,
+    "intensity_scaling": 0.5,
+    "additional_metadata": [],
+    "batch_size": 2,
+    "num_turns": 1,
+    "shuffle": true,
+    "use_fixed_set": false,
+    "average_pairs_per_bin": 2,
+    "max_pairs_per_bin": 100,
+    "same_prob_bins": [
+        [
+            0.0,
+            0.2
+        ],
+        [
+            0.2,
+            1.0
+        ]
+    ],
+    "include_diagonal": true,
+    "random_seed": 42,
+    "fingerprint_type": "daylight",
+    "fingerprint_nbits": 2048,
+    "augment_removal_max": 0.2,
+    "augment_removal_intensity": 0.2,
+    "augment_intensity": 0.2,
+    "augment_noise_max": 10,
+    "augment_noise_intensity": 0.02
+}
\ No newline at end of file
b
diff -r 000000000000 -r 0a0529822d91 test-data/Trained_model.onnx
b
Binary file test-data/Trained_model.onnx has changed
b
diff -r 000000000000 -r 0a0529822d91 test-data/clean_spectra.mgf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/clean_spectra.mgf Mon Sep 02 12:12:30 2024 +0000
b
b'@@ -0,0 +1,6545 @@\n+BEGIN IONS\n+CHARGE=1-\n+IONMODE=positive\n+SMILES=C1=CC=C2C(=C1)NC(=N2)C3=CC=CO3\n+INCHI=InChI=1S/C11H8N2O/c1-2-5-9-8(4-1)12-11(13-9)10-6-3-7-14-10/h1-7H,(H,12,13)\n+PUBMED=n/a\n+SCANS=675\n+COMPOUND_NAME=Pesticide6_Fuberidazole_C11H8N2O_2-(2-Furyl)-1H-benzimidazole\n+ADDUCT=M-H\n+PRECURSOR_MZ=183.057\n+PARENT_MASS=184.06427645199074\n+INCHIKEY=UYJUZNLFJAWNEZ-UHFFFAOYSA-N\n+MS_LEVEL=2\n+INSTRUMENT_TYPE=-Q-Exactive Plus Orbitrap Res 70k\n+FILE_NAME=Pesticide_Mix6_neg.mzXML\n+PEPTIDE_SEQUENCE=*..*\n+ORGANISM_NAME=GNPS-COLLECTIONS-PESTICIDES-NEGATIVE\n+PRINCIPAL_INVESTIGATOR=Dorrestein/Touboul\n+DATA_COLLECTOR=lfnothias\n+SUBMIT_USER=mwang87\n+CONFIDENCE=1\n+SPECTRUM_ID=CCMSLIB00001058235\n+70.786774 0.0007471592824649794 \n+72.976173 0.000845691292574321 \n+73.493057 0.0007356799253582395 \n+73.515923 0.000741301572692973 \n+74.17305 0.0007321002583299212 \n+75.004799 0.00109654132590122 \n+77.80809 0.0007347543045695998 \n+82.050003 0.000756525515244351 \n+82.620819 0.0009149395002561002 \n+82.860512 0.0007769996417003124 \n+88.244919 0.0007313444143177342 \n+89.454147 0.0008042889036619802 \n+94.881096 0.0007669434273430975 \n+94.917038 0.03835127532434429 \n+94.918587 0.0016159622558933084 \n+94.993851 0.0011285128508041412 \n+102.743492 0.0008298266830739796 \n+106.645241 0.0008737587071305573 \n+106.876747 0.0009416995332485276 \n+109.907501 0.0009416649476005834 \n+112.186974 0.000831069804168125 \n+112.985817 0.0073472682003234095 \n+113.004906 0.000827142052600318 \n+115.000427 0.004072169320321451 \n+115.333664 0.0009369156539800012 \n+116.995949 0.020538189932937403 \n+117.046104 0.006795963912947386 \n+117.763412 0.0007971754214721417 \n+122.933907 0.0009391669600031306 \n+125.587662 0.0009065026763172214 \n+126.956421 0.0011171812766880254 \n+133.029556 0.0015108471153377407 \n+133.420456 0.0008102810560462471 \n+133.435806 0.0009753709315551408 \n+136.894592 0.0014461903832713615 \n+138.90712 0.01529902536976025 \n+138.965378 0.0011543174307879335 \n+141.046158 0.001277732835226084 \n+142.99292 0.0029939467228695103 \n+142.995346 0.003604481587134265 \n+146.785355 0.0009544289019961817 \n+154.053802 0.012885651738387452 \n+154.951416 0.001552681758392334 \n+154.992584 0.003692910520172884 \n+155.061676 0.5147309484142452 \n+163.001526 0.008337771489619474 \n+170.366013 0.0009908618355743276 \n+182.987579 0.005978210611573189 \n+183.007996 0.01814432541918768 \n+183.056702 1.0 \n+183.121933 0.001568256562688345 \n+190.211334 0.0008562716641785351 \n+202.308212 0.001147175501482932 \n+END IONS\n+\n+BEGIN IONS\n+CHARGE=1-\n+IONMODE=positive\n+SMILES=CCCCC1=C(NC(=NC1=O)NCC)C\n+INCHI=InChI=1S/C11H19N3O/c1-4-6-7-9-8(3)13-11(12-5-2)14-10(9)15/h4-7H2,1-3H3,(H2,12,13,14,15)\n+PUBMED=n/a\n+SCANS=799\n+COMPOUND_NAME=Pesticide6_Ethirimol_C11H19N3O_882476\n+ADDUCT=M-H\n+PRECURSOR_MZ=208.146\n+PARENT_MASS=209.15327645199073\n+INCHIKEY=BBXXLROWFHWFQY-UHFFFAOYSA-N\n+MS_LEVEL=2\n+INSTRUMENT_TYPE=-Q-Exactive Plus Orbitrap Res 70k\n+FILE_NAME=Pesticide_Mix6_neg.mzXML\n+PEPTIDE_SEQUENCE=*..*\n+ORGANISM_NAME=GNPS-COLLECTIONS-PESTICIDES-NEGATIVE\n+PRINCIPAL_INVESTIGATOR=Dorrestein/Touboul\n+DATA_COLLECTOR=lfnothias\n+SUBMIT_USER=mwang87\n+CONFIDENCE=1\n+SPECTRUM_ID=CCMSLIB00001058238\n+71.903069 0.0004648291231942036 \n+71.947182 0.0004283445243219117 \n+74.565865 0.000469916086627453 \n+75.419785 0.0006587251024457776 \n+75.420715 0.0008395344591522679 \n+78.32296 0.0004274646599938443 \n+79.322105 0.00045821705236950193 \n+81.058884 0.0004781716585102364 \n+81.059921 0.0005731111499287832 \n+81.116722 0.00043096872479086355 \n+83.45752 0.0004861158306699209 \n+84.196243 0.0004363585621093188 \n+84.528267 0.00046703468299359036 \n+86.351273 0.0004555336781321055 \n+92.452217 0.0004517979876012696 \n+95.990242 0.001087118108724047 \n+106.570053 0.0006266439268792406 \n+106.702705 0.0005019467524756924 \n+114.066223 0.0004453210855411035 \n+116.140869 0.0007225658228809554 \n+116.142654 0.000799180982675793 \n+117.28965 0.0004466653534595229 \n+120.383957 0.0005353746845716758 \n+123.893204 0.0004501144382596247 \n+124.408546 0.000436684'..b'307446694 \n+214.028976 0.14237526889990515 \n+227.798203 0.0003944039468329826 \n+232.039551 0.005540042342313366 \n+234.03479 0.038048249907356266 \n+236.149445 0.0007123288311506702 \n+237.868103 0.0022370953537359313 \n+253.930969 0.0022340086857174824 \n+254.041382 1.0 \n+254.148926 0.0022549753402181035 \n+254.154694 0.0005821133511861894 \n+258.018066 0.0015949558980382834 \n+271.799957 0.00037950039989465413 \n+271.806305 0.0003839479321218562 \n+271.92218 0.17284037542272346 \n+274.047394 0.08925392075289643 \n+274.055969 0.0006126399906773597 \n+306.421631 0.001604061131535462 \n+315.286621 0.0003830297794030247 \n+405.962219 0.004073367430890103 \n+436.32077 0.00038301025108200895 \n+447.176361 0.00035167536851231633 \n+484.332703 0.0003584478933862859 \n+607.278015 0.0003944713629467001 \n+610.201599 0.00035151647865231913 \n+632.82312 0.0004322436240831766 \n+681.013611 0.0011754550213838061 \n+END IONS\n+\n+BEGIN IONS\n+CHARGE=1-\n+IONMODE=negative\n+SMILES=C1CC1(C(CC2=CC=CC=C2Cl)(CN3C(=S)N=CN3)O)Cl\n+INCHI=InChI=1S/C14H15Cl2N3OS/c15-11-4-2-1-3-10(11)7-14(20,13(16)5-6-13)8-19-12(21)17-9-18-19/h1-4,9,20H,5-8H2,(H,17,18,21)\n+PUBMED=n/a\n+SCANS=1819\n+COMPOUND_NAME=Pesticide5_Prothioconazole_C14H15Cl2N3OS_Redigo\n+ADDUCT=M-H\n+PRECURSOR_MZ=342.024\n+PARENT_MASS=343.0312764519908\n+INCHIKEY=MNHVNIJQQRJYDH-UHFFFAOYSA-N\n+MS_LEVEL=2\n+INSTRUMENT_TYPE=-Q-Exactive Plus Orbitrap Res 70k\n+FILE_NAME=Pesticide_Mix5_neg.mzXML\n+PEPTIDE_SEQUENCE=*..*\n+ORGANISM_NAME=GNPS-COLLECTIONS-PESTICIDES-NEGATIVE\n+PRINCIPAL_INVESTIGATOR=Dorrestein/Touboul\n+DATA_COLLECTOR=lfnothias\n+SUBMIT_USER=mwang87\n+CONFIDENCE=1\n+SPECTRUM_ID=CCMSLIB00001058471\n+71.990753 0.015116774544023995 \n+72.986069 0.0016825795325651884 \n+74.670418 0.0013362726296112454 \n+75.724503 0.0013244119027955264 \n+86.642189 0.0012476389108325326 \n+87.146202 0.001596097640330774 \n+91.927261 0.001425194097605796 \n+92.643082 0.0013633691011437684 \n+98.989372 0.11760745931331651 \n+99.997299 0.7075052541531572 \n+100.024094 0.0021253587890629126 \n+102.928947 0.0015047900211222238 \n+105.755836 0.001316939194187164 \n+108.527939 0.0014748761874386134 \n+109.666077 0.0013635788794660302 \n+110.036003 0.011329920389071362 \n+111.127708 0.0014857822888041652 \n+111.997559 0.0023746833316083517 \n+114.013115 0.029289035849694257 \n+118.041168 0.0019808910294971853 \n+119.024933 0.002724093592573426 \n+120.056686 0.006126454793160624 \n+122.036072 0.053981011120850804 \n+123.997589 0.0021668666401934798 \n+125.006592 0.015438088121917475 \n+125.016548 0.3418557721756947 \n+134.036148 0.0023033735491672564 \n+146.036026 0.012161220158205456 \n+146.038376 0.001696504729899564 \n+147.043732 0.026770572055826038 \n+147.086044 0.0013877915691700021 \n+148.051849 0.002256452715578696 \n+149.925461 0.0013352702523049627 \n+152.029037 0.005232454197725137 \n+152.185623 0.0014520591321934373 \n+153.000473 0.00250826881430397 \n+153.013046 0.007134054173223357 \n+156.024017 0.008584287903830515 \n+158.988098 0.0015487344570004331 \n+159.949387 0.0012974185672795632 \n+162.18541 0.0015724386869369607 \n+163.515976 0.0014444521406317657 \n+178.00882 0.001686840487395313 \n+178.910522 0.001358278108927308 \n+179.426331 0.0012509074923695968 \n+180.023941 0.21021807375880533 \n+181.032272 0.0014581759135889826 \n+182.851898 0.001482852714733807 \n+183.004364 0.03742135932070929 \n+183.150711 0.0014350865796783085 \n+184.18306 0.001361525857087034 \n+216.00032 0.07036976432878854 \n+218.748413 0.001442200375447527 \n+219.033325 0.008722682199034715 \n+220.053864 0.002119137634099718 \n+222.825455 0.0013479468869010529 \n+247.06488 0.009214680671581104 \n+250.021469 0.0032453350001584165 \n+252.788223 0.0013368653514209978 \n+254.84024 0.0014569414805622736 \n+264.036957 0.11840294866243518 \n+266.016724 0.005148004906903799 \n+270.069885 0.0018437036300653733 \n+274.075409 0.003386558930687423 \n+278.016479 0.025966963851954333 \n+306.048065 0.1885567316527084 \n+335.996277 0.0016951210591739099 \n+340.618744 0.0016478285239790046 \n+342.024384 1.0 \n+342.192444 0.001795215337797163 \n+END IONS\n+\n'
b
diff -r 000000000000 -r 0a0529822d91 test-data/inp_filtered_library.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inp_filtered_library.msp Mon Sep 02 12:12:30 2024 +0000
[
b'@@ -0,0 +1,4205 @@\n+SCANNUMBER: -1\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C20H12\n+INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N\n+SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2\n+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n+IONIZATION: EI+\n+LICENSE: CC BY-NC\n+COMPOUND_NAME: Perylene\n+RETENTION_TIME: None\n+RETENTION_INDEX: 2886.9\n+PRECURSOR_MZ: 252.09323\n+ADDUCT: [M]+\n+COLLISION_ENERGY: 70eV\n+INSTRUMENT_TYPE: GC-EI-Orbitrap\n+CHARGE: 1\n+PARENT_MASS: 251.08595400000002\n+NUM PEAKS: 3\n+250.07765   0.3282529462971431\n+252.09323   1.0         "Theoretical m/z 252.093354, Mass diff 0 (0.49 ppm), SMILES C1=CC=2C=CC=C3C4=CC=CC5=CC=CC(C(=C1)C23)=C54, Annotation [C20H12]+, Rule of HR False"\n+253.09656   0.20573802940517583\n+\n+SCANNUMBER: -1\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C14H10\n+INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N\n+SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2\n+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n+IONIZATION: EI+\n+LICENSE: CC BY-NC\n+COMPOUND_NAME: Phenanthrene\n+RETENTION_TIME: None\n+RETENTION_INDEX: 1832.9\n+PRECURSOR_MZ: 178.0775\n+ADDUCT: [M]+\n+COLLISION_ENERGY: 70eV\n+INSTRUMENT_TYPE: GC-EI-Orbitrap\n+CHARGE: 1\n+PARENT_MASS: 177.070224\n+NUM PEAKS: 5\n+152.0619    0.1657993569424221\n+176.062     0.24558560966311757     "Theoretical m/z 176.0626, Mass diff 0 (0 ppm), Formula C14H8"\n+177.06982   0.12764433529926775     "Theoretical m/z 177.070425, Mass diff 0 (0 ppm), Formula C14H9"\n+178.0775    1.0         "Theoretical m/z 178.077698, Mass diff 0 (1.11 ppm), SMILES C=1C=CC2=C(C1)C=CC=3C=CC=CC32, Annotation [C14H10]+, Rule of HR False"\n+179.08078   0.16394988149600653\n+\n+SCANNUMBER: -1\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C14H10\n+INCHIKEY: MWPLVEDNUUSJAV-UHFFFAOYSA-N\n+SMILES: C1=CC2=CC3=C(C=CC=C3)C=C2C=C1\n+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n+IONIZATION: EI+\n+LICENSE: CC BY-NC\n+COMPOUND_NAME: Anthracene\n+RETENTION_TIME: None\n+RETENTION_INDEX: 1844.4\n+PRECURSOR_MZ: 178.07754\n+ADDUCT: [M]+\n+COLLISION_ENERGY: 70eV\n+INSTRUMENT_TYPE: GC-EI-Orbitrap\n+CHARGE: 1\n+PARENT_MASS: 177.070264\n+NUM PEAKS: 5\n+152.06195   0.12450313104470498\n+176.06204   0.23295403420236208     "Theoretical m/z 176.0626, Mass diff 0 (0 ppm), Formula C14H8"\n+177.06984   0.1074344883724439      "Theoretical m/z 177.070425, Mass diff 0 (0 ppm), Formula C14H9"\n+178.07754   1.0         "Theoretical m/z 178.077698, Mass diff 0 (0.89 ppm), SMILES C=1C=CC=2C=C3C=CC=CC3=CC2C1, Annotation [C14H10]+, Rule of HR False"\n+179.08081   0.1616741186784917\n+\n+SCANNUMBER: -1\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C12H10\n+INCHIKEY: CWRYPZZKDGJXCA-UHFFFAOYSA-N\n+SMILES: C1CC2=C3C1=CC=CC3=CC=C2\n+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n+IONIZATION: EI+\n+LICENSE: CC BY-NC\n+COMPOUND_NAME: Acenaphthene\n+RETENTION_TIME: None\n+RETENTION_INDEX: 1528.3\n+PRECURSOR_MZ: 154.07741\n+ADDUCT: [M]+\n+COLLISION_ENERGY: 70eV\n+INSTRUMENT_TYPE: GC-EI-Orbitrap\n+CHARGE: 1\n+PARENT_MASS: 153.070134\n+NUM PEAKS: 4\n+151.05418   0.10238389021994407     "Theoretical m/z 151.054775, Mass diff 0 (0 ppm), Formula C12H7"\n+152.06194   0.4817565861859871\n+153.06969   1.0         "Theoretical m/z 153.070425, Mass diff 0 (0 ppm), Formula C12H9"\n+154.07741   0.6474388804646675      "Theoretical m/z 154.077698, Mass diff 0 (1.87 ppm), SMILES C=1C=C2C=CC=C3C2=C(C1)CC3, Annotation [C12H10]+, Rule of HR False"\n+\n+SCANNUMBER: -1\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C16H10\n+INCHIKEY: GVEPBJHOBDJJJI-UHFFFAOYSA-N\n+SMILES: C1=CC2=C(C=C1)C1=C3C2=CC=CC3=CC=C1\n+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n+IONIZATION: EI+\n+LICENSE: CC BY-NC\n+COMPOUND_NAME: Fluoranthene\n+RETENTION_TIME: None\n+RETENTION_INDEX: 2102.7\n+PRECURSOR_MZ: 202.07756\n+ADDUCT: [M]+\n+COLLISION_ENERGY: 70eV\n'..b'  "Theoretical m/z 235.066148, Mass diff 0 (0.18 ppm), SMILES O=C(NC1=CC=CC=C1)C2=C(OCCS2)C, Annotation [C12H13NO2S]+, Rule of HR False"\n+\n+SCANNUMBER: -1\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C13H18O5S\n+INCHIKEY: IRCMYGHHKLLGHV-UHFFFAOYSA-N\n+SMILES: CCOC1C(C2=C(O1)C=CC(=C2)OS(=O)(=O)C)(C)C\n+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n+IONIZATION: EI+\n+LICENSE: CC BY-NC\n+COMPOUND_NAME: Ethofumesate\n+RETENTION_TIME: None\n+RETENTION_INDEX: 1954.4\n+PRECURSOR_MZ: 286.08679\n+ADDUCT: [M]+\n+COLLISION_ENERGY: 70eV\n+INSTRUMENT_TYPE: GC-EI-Orbitrap\n+CHARGE: 1\n+PARENT_MASS: 285.079514\n+NUM PEAKS: 13\n+79.05419    0.1392834489952906      "Theoretical m/z 79.054228, Mass diff 0 (0.48 ppm), SMILES C1=CC=CC=C1, Annotation [C6H6+H]+, Rule of HR True"\n+91.0542     0.1456948233889637      "Theoretical m/z 91.054775, Mass diff 0 (0 ppm), Formula C7H7"\n+105.06984   0.29471655075385655     "Theoretical m/z 105.070425, Mass diff 0 (0 ppm), Formula C8H9"\n+115.05416   0.14265948876449097     "Theoretical m/z 115.054229, Mass diff 0 (0.6 ppm), SMILES C1=CC=C(C=C1)C(C)C, Annotation [C9H12-5H]+, Rule of HR True"\n+133.0647    0.3868381358475808      "Theoretical m/z 133.064798, Mass diff 0 (0.73 ppm), SMILES OC=1C=CC=C(C=1)C(C)C, Annotation [C9H12O-3H]+, Rule of HR True"\n+137.05962   0.565607729176301       "Theoretical m/z 137.060255, Mass diff 0 (0 ppm), Formula C8H9O2"\n+161.0596    1.0         "Theoretical m/z 161.059701, Mass diff 0 (0.63 ppm), SMILES OC=1C=CC=2OCC(C=2(C=1))(C)C, Annotation [C10H12O2-3H]+, Rule of HR True"\n+162.06293   0.1395183569693118\n+163.07518   0.1571750863529426      "Theoretical m/z 163.075351, Mass diff 0 (1.05 ppm), SMILES OC=1C=CC=2OCC(C=2(C=1))(C)C, Annotation [C10H12O2-H]+, Rule of HR True"\n+179.07016   0.3858738890199595      "Theoretical m/z 179.07027, Mass diff 0 (0.62 ppm), SMILES OC=1C=CC=2OC(O)C(C=2(C=1))(C)C, Annotation [C10H12O3-H]+, Rule of HR True"\n+207.1015    0.9146502804597079      "Theoretical m/z 207.101566, Mass diff 0 (0.32 ppm), SMILES OC=1C=CC=2OC(OCC)C(C=2(C=1))(C)C, Annotation [C12H16O3-H]+, Rule of HR True"\n+208.1048    0.1224932435932507\n+286.08679   0.294378369815484       "Theoretical m/z 286.086945, Mass diff 0 (0.54 ppm), SMILES O=S(=O)(OC=1C=CC=2OC(OCC)C(C=2(C=1))(C)C)C, Annotation [C13H18O5S]+, Rule of HR False"\n+\n+SCANNUMBER: -1\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C17H17N3OS\n+INCHIKEY: LMVPQMGRYSRMIW-KRWDZBQOSA-N\n+SMILES: CC1(C(=O)N(C(=N1)SC)NC2=CC=CC=C2)C3=CC=CC=C3\n+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n+IONIZATION: EI+\n+LICENSE: CC BY-NC\n+COMPOUND_NAME: Fenamidone\n+RETENTION_TIME: None\n+RETENTION_INDEX: 2516.1\n+PRECURSOR_MZ: 311.10815\n+ADDUCT: [M]+\n+COLLISION_ENERGY: 70eV\n+INSTRUMENT_TYPE: GC-EI-Orbitrap\n+CHARGE: 1\n+PARENT_MASS: 310.10087400000003\n+NUM PEAKS: 10\n+77.03851    0.1341728006141734      "Theoretical m/z 77.038578, Mass diff 0 (0.88 ppm), SMILES C1=CC=CC=C1, Annotation [C6H6-H]+, Rule of HR True"\n+91.04161    0.14196713486951465     "Theoretical m/z 91.041647, Mass diff 0 (0.41 ppm), SMILES NC1=CC=CC=C1, Annotation [C6H7N-2H]+, Rule of HR False"\n+103.05415   0.13231626786608644     "Theoretical m/z 103.054223, Mass diff 0 (0.71 ppm), SMILES C1=CC=C(C=C1)CC, Annotation [C8H10-3H]+, Rule of HR True"\n+206.07457   0.2477103476704148      "Theoretical m/z 206.07464, Mass diff 0 (0.34 ppm), SMILES N(=C(NN)SC)C(C1=CC=CC=C1)C, Annotation [C10H15N3S-3H]+, Rule of HR True"\n+210.11507   0.1254196447843151\n+237.10208   0.5638187350251782      "Theoretical m/z 237.102243, Mass diff 0 (0.69 ppm), SMILES O=C(NNC1=CC=CC=C1)C(C2=CC=CC=C2)C, Annotation [C15H16N2O-3H]+, Rule of HR True"\n+238.10997   0.737876380592742\n+239.11317   0.12354824276317873\n+268.09      1.0         "Theoretical m/z 268.090295, Mass diff 0 (1.1 ppm), SMILES N(=C(NNC1=CC=CC=C1)S)C(C2=CC=CC=C2)C, Annotation [C15H17N3S-3H]+, Rule of HR True"\n+269.09351   0.15574634382295574\n+\n'
b
diff -r 000000000000 -r 0a0529822d91 test-data/inp_filtered_spectra.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inp_filtered_spectra.msp Mon Sep 02 12:12:30 2024 +0000
b
b'@@ -0,0 +1,1008 @@\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C019\n+RETENTION_TIME: 688.11\n+RETENTION_INDEX: 4135.446429\n+CHARGE: -1\n+NUM PEAKS: 21\n+164.985419247789        0.2949419846671587\n+179.033979756352        0.1975962017168221\n+194.04893073403         0.1080780080055076\n+248.988380501455        0.4192550107623055\n+251.037178293           0.1975188727945594\n+283.099929585291        0.24858700581303622\n+313.053040045895        0.15255184988804857\n+329.031653006854        0.12152318335456756\n+341.157248840923        0.33065016037294653\n+385.022947628725        0.11792404215662944\n+401.053681557414        0.5558197970588639\n+403.051400482668        0.10684249115159443\n+415.106669687654        1.0\n+416.107049345269        0.3343846089983622\n+417.104122333661        0.16072829222839785\n+489.124198650628        0.7880577919119076\n+535.10922525834         0.17951802003040962\n+550.163296442538        0.22193082425956692\n+551.161445828019        0.11285226875001307\n+564.146181690587        0.10968283775320925\n+623.183150220198        0.24882510323783946\n+\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C008\n+RETENTION_TIME: 383.25\n+RETENTION_INDEX: 2436.111111\n+CHARGE: -1\n+NUM PEAKS: 10\n+167.06348032557         0.2012680241749571\n+169.042872715042        0.22527813564183466\n+185.073952424469        0.1146092209329471\n+224.061333736415        0.637856954108077\n+241.084284214072        0.5378362429201462\n+243.086918863664        0.8440889152136807\n+245.102418520421        1.0\n+257.113614660022        0.7014464407039914\n+258.111154671539        0.45403443749116\n+315.119168534318        0.2802677742648726\n+\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C001\n+RETENTION_TIME: 268.99\n+RETENTION_INDEX: 1800.989583\n+CHARGE: -1\n+NUM PEAKS: 7\n+147.065597668017        0.20713460605355152\n+149.044686744287        0.15021454193335101\n+183.047097257536        0.2756365347787095\n+257.066031671279        0.16716705800250425\n+273.097313808265        1.0\n+274.099806030141        0.1382750943677773\n+347.116266654718        0.10796331331103011\n+\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C010\n+RETENTION_TIME: 271.76\n+RETENTION_INDEX: 1815.416667\n+CHARGE: -1\n+NUM PEAKS: 12\n+76.0323849976885        0.5337096603553363\n+107.129167494293        0.17750725429827036\n+120.05545372843         0.732323548309492\n+132.054968772294        0.6582449478277536\n+136.084083848357        0.18110012219874336\n+138.177598969977        0.10980088538234019\n+311.387345297053        0.3149229368865709\n+312.394070075839        1.0\n+313.396963036504        0.1996701454734163\n+329.397982197985        0.17215789049120694\n+330.403963356557        0.5441264876224288\n+331.407288294656        0.1075489761979763\n+\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C009\n+RETENTION_TIME: 224.1\n+RETENTION_INDEX: 1606.578947\n+CHARGE: -1\n+NUM PEAKS: 5\n+128.088974937905        0.30238672398872746\n+142.104728421893        0.13125344469355718\n+230.102912184687        0.13629903834332716\n+246.13394892703         1.0\n+247.135560589937        0.1027800448922141\n+\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C011\n+RETENTION_TIME: 322.28\n+RETENTION_INDEX: 2083.777778\n+CHARGE: -1\n+NUM PEAKS: 8\n+191.091545005862        0.3661694612326083\n+204.099524919261        0.1379655416395176\n+217.107439740029        0.8659676702439103\n+221.084188869749        0.13224939836684152\n+265.110639707297        0.12404535021125133\n+305.141591931944        1.0\n+306.142075983677        0.19894438269593562\n+318.149353523284        0.5161780338363837\n+\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C013\n+RETENTION_TIME: 471.03\n+RETENTION_INDEX: 3168.684211\n+CHARGE: -1\n+NUM PEAKS: 6\n+78.0464517452347        0.13019193533313522\n+155.087332200892        0.16981875175936348\n+207.101729466849        1.0\n+313.053036971607        0.12108100658431745\n+353.320009154806        0.2895861096886366\n+624.30876303981         0.14852029190801175\n'..b'349696378594456\n+196.992752093539        0.3065603768824057\n+197.974508055988        0.395078144825856\n+198.95441899853         0.20984023997622794\n+212.970014961884        0.312364140356306\n+213.006147303489        0.3451086913299261\n+214.98560992714         0.5058106319186024\n+220.957069324217        0.1741467403445935\n+222.936453968368        0.1925777417566011\n+229.019119616188        1.0\n+230.00074243328         0.4313373151032761\n+236.98832473252         0.17144823687024496\n+240.946899738586        0.37016492220221137\n+253.982966844011        0.2619026170220304\n+269.977845781938        0.369206087321791\n+270.975426915952        0.26702156040941716\n+270.993770069105        0.5558156567281477\n+297.079403794841        0.3276080388276794\n+298.934550867121        0.27505460413538213\n+328.980928284848        0.30053783536988743\n+330.960641905517        0.36106011190503906\n+372.953096157169        0.24565144306023193\n+\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C018\n+RETENTION_TIME: 687.52\n+RETENTION_INDEX: 4133.690476\n+CHARGE: -1\n+NUM PEAKS: 9\n+209.011554808631        0.6525630494040975\n+209.029113642814        0.10839584784422365\n+210.990970491228        0.15000447517519525\n+226.041539764125        0.3130785916148705\n+227.039590062704        0.17080278851135594\n+265.020169474237        0.2407632085362116\n+281.051364215883        1.0\n+282.049803555434        0.21798058347517868\n+285.009414657485        0.12222513080126347\n+\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C012\n+RETENTION_TIME: 687.83\n+RETENTION_INDEX: 4134.613095\n+CHARGE: -1\n+NUM PEAKS: 27\n+121.046741626167        0.12537265500763864\n+192.997735629267        0.1940732067111916\n+205.016711322548        0.12056941717717026\n+209.098935410027        0.12559326156418593\n+213.05754958053         0.27142481482801556\n+253.016653119059        0.8938154160284864\n+271.027137265637        0.1375157398831428\n+284.047997947258        0.1709644868199992\n+325.985953604199        0.17320279707305308\n+327.03525664153         1.0\n+328.035343658           0.26668492712822667\n+345.046169172075        0.17928530473894783\n+359.09769204202         0.2008606829314974\n+360.028136457829        0.21741111817985187\n+387.002413998209        0.2632563912022333\n+387.071825807939        0.3071277807533759\n+402.055174276849        0.2538777644377674\n+461.090256355859        0.3629780005380489\n+462.091555156095        0.14179921776218857\n+475.072310244956        0.27595658697326014\n+476.143434019655        0.2650001066007337\n+477.140480798609        0.10059042565917092\n+490.12632015072         0.3700192231231476\n+491.122346777972        0.20934752539009766\n+549.162553092955        0.5034364902937184\n+563.144239233773        0.2590935918637112\n+624.183209223215        0.1451500409395642\n+\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C020\n+RETENTION_TIME: 687.99\n+RETENTION_INDEX: 4135.089286\n+CHARGE: -1\n+NUM PEAKS: 24\n+194.053642118165        0.17149988589657134\n+212.061782732582        0.3408023706085241\n+267.034768010352        0.14461973681405008\n+267.068824022318        0.9059077886912253\n+310.970568727813        0.10207376981761378\n+325.055731606087        0.2492913881575955\n+327.965549188207        0.17771113517875153\n+339.03821058645         0.2164141235131004\n+342.996634492902        0.12864401601229136\n+345.115297423962        0.12858631941521836\n+358.067243216398        0.11516054657713713\n+361.025211906011        0.21277786650130676\n+388.003000430725        0.1117630757647746\n+388.073272089579        0.1379380986368877\n+399.005054559559        0.13523814605127854\n+401.984326631505        0.18236859257167579\n+402.98179623463         0.11632404563967355\n+416.036473280551        0.23580021278191537\n+417.033665098569        0.13482894754843827\n+430.088321970134        0.3515024935084798\n+431.085366629672        0.22490708219361874\n+475.14184210128         1.0\n+565.143723544965        0.11382519851041557\n+625.181479977537        0.1264407374154073\n+\n'
b
diff -r 000000000000 -r 0a0529822d91 test-data/msp_json_score_out.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/msp_json_score_out.json Mon Sep 02 12:12:30 2024 +0000
[
b'@@ -0,0 +1,1 @@\n+{"__Scores__": true, "is_symmetric": false, "references": [{"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C20H12", "inchikey": "CSHWQDPOILHKBI-UHFFFAOYSA-N", "smiles": "C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "compound_name": "Perylene", "retention_time": null, "retention_index": 2886.9, "precursor_mz": 252.09323, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "charge": 1, "parent_mass": "251.08595400000002", "peak_comments": {"252.09323": "Theoretical m/z 252.093354, Mass diff 0 (0.49 ppm), SMILES C1=CC=2C=CC=C3C4=CC=CC5=CC=CC(C(=C1)C23)=C54, Annotation [C20H12]+, Rule of HR False"}, "num_peaks": "3", "peaks_json": [[250.07765, 0.3282529462971431], [252.09323, 1.0], [253.09656, 0.20573802940517583]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "YNPNZTXNASCQKK-UHFFFAOYSA-N", "smiles": "C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "compound_name": "Phenanthrene", "retention_time": null, "retention_index": 1832.9, "precursor_mz": 178.0775, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "charge": 1, "parent_mass": "177.070224", "peak_comments": {"176.062": "Theoretical m/z 176.0626, Mass diff 0 (0 ppm), Formula C14H8", "177.06982": "Theoretical m/z 177.070425, Mass diff 0 (0 ppm), Formula C14H9", "178.0775": "Theoretical m/z 178.077698, Mass diff 0 (1.11 ppm), SMILES C=1C=CC2=C(C1)C=CC=3C=CC=CC32, Annotation [C14H10]+, Rule of HR False"}, "num_peaks": "5", "peaks_json": [[152.0619, 0.1657993569424221], [176.062, 0.24558560966311757], [177.06982, 0.12764433529926775], [178.0775, 1.0], [179.08078, 0.16394988149600653]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "MWPLVEDNUUSJAV-UHFFFAOYSA-N", "smiles": "C1=CC2=CC3=C(C=CC=C3)C=C2C=C1", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "compound_name": "Anthracene", "retention_time": null, "retention_index": 1844.4, "precursor_mz": 178.07754, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "charge": 1, "parent_mass": "177.070264", "peak_comments": {"176.06204": "Theoretical m/z 176.0626, Mass diff 0 (0 ppm), Formula C14H8", "177.06984": "Theoretical m/z 177.070425, Mass diff 0 (0 ppm), Formula C14H9", "178.07754": "Theoretical m/z 178.077698, Mass diff 0 (0.89 ppm), SMILES C=1C=CC=2C=C3C=CC=CC3=CC2C1, Annotation [C14H10]+, Rule of HR False"}, "num_peaks": "5", "peaks_json": [[152.06195, 0.12450313104470498], [176.06204, 0.23295403420236208], [177.06984, 0.1074344883724439], [178.07754, 1.0], [179.08081, 0.1616741186784917]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C12H10", "inchikey": "CWRYPZZKDGJXCA-UHFFFAOYSA-N", "smiles": "C1CC2=C3C1=CC=CC3=CC=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "compound_name": "Acenaphthene", "retention_time": null, "retention_index": 1528.3, "precursor_mz": 154.07741, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "charge": 1, "parent_mass": "153.070134", "peak_comments": {"151.05418": "Theoretical m/z 151.054775, Mass diff 0 (0 ppm), Formula C12H7", "153.06969": "Theoretical m/z 153.070425, Mass diff 0 (0 ppm), Formula C12H9", "154.07741": "Theoretical m/z 154.077698, Mass diff 0 (1.87 ppm), SMILES C=1C=C2C=CC=C3C2=C(C1)CC3, Annotation [C12H10]+, Rule of HR False"}, "num_peaks":'..b' [0.9999143345949728], [0.9997657574906329], [0.9998978216960445], [0.9998945389522197], [0.9998864702679015], [0.9998862932948742], [0.9998333198637035], [0.9999279064347144], [0.9996385856321696], [0.9998650587879883], [0.9998641224220777], [0.99987875060921], [0.9998812801923874], [0.9997646555357158], [0.9997240324384985], [0.9998189007203114], [0.999891784581221], [0.9996585120698334], [0.999887045678266], [0.9998438715475372], [0.9998448729802681], [0.9998252844554547], [0.9999519822614167], [0.9992598782046618], [0.999940527506364], [0.9994611604785356], [0.9998261356872763], [0.9999142880218148], [0.9998632393464923], [0.9999800325934705], [0.999953034711225], [0.9999830720999154], [0.9999521489939208], [0.9999764925265916], [0.9999360249560456], [0.9999185377955141], [0.9999166751683058], [0.999912528345136], [0.9999484239845527], [0.9999150963462649], [0.9999637742975606], [0.9999472081654047], [0.9999458958056243], [0.9999364563588813], [0.99991396473093], [0.9998051155361255], [0.9998902184904653], [0.9999441687501256], [0.999990621005114], [0.999902199327124], [0.9999678879184496], [0.9999804943664975], [0.9999456092231348], [0.9999471958183924], [0.9999438042647292], [0.9999771700128398], [0.999916039099827], [0.999937794543087], [0.9999375493608876], [0.999630889901168], [0.999965722754082], [0.9999325420733577], [0.9999489552433235], [0.9999207122711702], [0.9997186208622122], [0.9997137955070117], [0.9996977227655632], [0.9999197074673556], [0.9997368186457503], [0.9999621420048419], [0.9999158530438923], [0.999925086238387], [0.9999528717835519], [0.9999669311424155], [0.9993219119602206], [0.9999524930043491], [0.9995225174825454], [0.9997656549123947], [0.9998640265336487], [0.9998971550260584], [0.9999340419233629], [0.9999309770772353], [0.9999780248889175], [0.9999463803531405], [0.9999439531371518], [0.9998852503549406], [0.9997824868782953], [0.999876667232338], [0.9998831499614002], [0.9999091699166418], [0.9999229898310841], [0.9998913014769008], [0.9998498334271463], [0.9999453850737525], [0.9998980307890846], [0.9998584584943488], [0.9998552218484892], [0.999827532061527], [0.9999031728413462], [0.9999407256436771], [0.9997492983547175], [0.9999260627597029], [0.9999397513841002], [0.9998210761389577], [0.9998883440736017], [0.9998578406452548], [0.9998995412411811], [0.9998867920570471], [0.9998666662431963], [0.9999846608780388], [0.9997223559872718], [0.9999001145470334], [0.9998358521910925], [0.9998634971033604], [0.999863197449819], [0.9997480726466655], [0.9997670409043906], [0.9996593245437297], [0.999894345839193], [0.9997723383195918], [0.9999299475127384], [0.9999093038833364], [0.9998485852787227], [0.9998359115954737], [0.9999591724681508], [0.999209857877817], [0.9999545775746143], [0.9995653841932775], [0.9997242844773115], [0.999927108318947], [0.9996612143292061], [0.9998834822602634], [0.9998260389853331], [0.9999075004534518], [0.9998137074601509], [0.9999444224449484], [0.9999296322652415], [0.9998889707549917], [0.9999212997981346], [0.9999685454119221], [0.9999153407874176], [0.9999481701910652], [0.9999504390489771], [0.999910257658444], [0.9999298715756636], [0.9997371240012387], [0.9998468805956285], [0.9998695311760358], [0.9999772579170608], [0.9998972102922338], [0.9999021561612373], [0.9998407099849175], [0.9999693614519108], [0.9998634005437494], [0.99987317070948], [0.9998956488540075], [0.9999377157916893], [0.9998803104525503], [0.9998222634232689], [0.9999475648330006], [0.9998245252974104], [0.9993321247846143], [0.9999682135584211], [0.9999375052189536], [0.9998777285688251], [0.9999603759478851], [0.999876051020765], [0.9998014664813272], [0.9997676240946334], [0.9999719953674675], [0.9996935807466338], [0.9998127459209668], [0.9997677432023466], [0.9999555494008183], [0.9998727755891867], [0.9999581188724604], [0.9995870044530937], [0.999934705288334], [0.9994901305170677], [0.9998579329591719]], "dtype": [["MS2DeepScore", "<f8"]]}\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 0a0529822d91 test-data/ri_match_60.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ri_match_60.json Mon Sep 02 12:12:30 2024 +0000
[
b'@@ -0,0 +1,1 @@\n+{"__Scores__": true, "is_symmetric": false, "references": [{"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "YNPNZTXNASCQKK-UHFFFAOYSA-N", "smiles": "C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": {"74.01508": "Theoretical m/z 74.01565, Mass diff 0 (0 ppm), Formula C6H2", "75.02295": "Theoretical m/z 75.023475, Mass diff 0 (0 ppm), Formula C6H3", "76.03075": "Theoretical m/z 76.0313, Mass diff 0 (0 ppm), Formula C6H4", "87.02295": "Theoretical m/z 87.023475, Mass diff 0 (0 ppm), Formula C7H3", "88.03076": "Theoretical m/z 88.0313, Mass diff 0 (0 ppm), Formula C7H4", "89.03857": "Theoretical m/z 89.039125, Mass diff 0 (0 ppm), Formula C7H5", "151.05415": "Theoretical m/z 151.054775, Mass diff 0 (0 ppm), Formula C12H7", "175.05423": "Theoretical m/z 175.054775, Mass diff 0 (0 ppm), Formula C14H7", "176.062": "Theoretical m/z 176.0626, Mass diff 0 (0 ppm), Formula C14H8", "177.06982": "Theoretical m/z 177.070425, Mass diff 0 (0 ppm), Formula C14H9", "178.0775": "Theoretical m/z 178.077698, Mass diff 0 (1.11 ppm), SMILES C=1C=CC2=C(C1)C=CC=3C=CC=CC32, Annotation [C14H10]+, Rule of HR False"}, "num_peaks": "19", "compound_name": "Phenanthrene", "retention_time": null, "retention_index": 1832.9, "precursor_mz": 178.0775, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[74.01508, 137808.0], [75.02295, 278714.0], [76.03075, 608417.0], [87.02295, 304266.0], [88.03076, 497050.0], [89.03857, 441168.0], [98.01511, 150478.0], [150.04633, 868927.0], [151.05415, 546351.0], [152.0619, 2275502.0], [153.06528, 276320.0], [169.06468, 272559.0], [174.04636, 365846.0], [175.05423, 272039.0], [176.062, 3370523.0], [177.06982, 1751846.0], [178.0775, 13724432.0], [179.08078, 2250119.0], [180.08412, 138203.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "MWPLVEDNUUSJAV-UHFFFAOYSA-N", "smiles": "C1=CC2=CC3=C(C=CC=C3)C=C2C=C1", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": {"74.0151": "Theoretical m/z 74.01565, Mass diff 0 (0 ppm), Formula C6H2", "75.02296": "Theoretical m/z 75.023475, Mass diff 0 (0 ppm), Formula C6H3", "76.03077": "Theoretical m/z 76.0313, Mass diff 0 (0 ppm), Formula C6H4", "87.02297": "Theoretical m/z 87.023475, Mass diff 0 (0 ppm), Formula C7H3", "88.03078": "Theoretical m/z 88.0313, Mass diff 0 (0 ppm), Formula C7H4", "89.0386": "Theoretical m/z 89.039125, Mass diff 0 (0 ppm), Formula C7H5", "91.05425": "Theoretical m/z 91.054775, Mass diff 0 (0 ppm), Formula C7H7", "151.05415": "Theoretical m/z 151.054775, Mass diff 0 (0 ppm), Formula C12H7", "175.05423": "Theoretical m/z 175.054775, Mass diff 0 (0 ppm), Formula C14H7", "176.06204": "Theoretical m/z 176.0626, Mass diff 0 (0 ppm), Formula C14H8", "177.06984": "Theoretical m/z 177.070425, Mass diff 0 (0 ppm), Formula C14H9", "178.07754": "Theoretical m/z 178.077698, Mass diff 0 (0.89 ppm), SMILES C=1C=CC=2C=C3C=CC=CC3=CC2C1, Annotation [C14H10]+, Rule of HR False"}, "num_peaks": "21", "compound_name": "Anthracene", "retention_time": null, "retention_index": 1844.4, "precursor_mz": 178.07754, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[74.0151, 117371.0], [75.02296, 245305.0], [76.03077, 632686.0], [87.02297, 236214.0], [88.03078, 347832.0], [89.0386, 507288.0], [91.05425, 130861.0], [126.04636, 128356.0], [149.04478, 126331.0], [150.04637, 641829.0], [151.05415, 403195.0], [152.06195, 1369833.0], [153.06534, 175290.0], [169.06471, 228827.0], [174.04639, 255716.0], [175.05423, 198784.0], [176.06204, 2563053.0], [177.06984, 1182037.0], [178.07754, 11002398.0], [179.080'..b'320.0], [224.062, 181393.0], [226.07771, 1503774.0], [227.08588, 1198323.0], [228.09344, 2885510.0], [229.10123, 1791323.0], [230.10886, 17890056.0], [231.11214, 3536976.0], [232.11542, 320231.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C16H10S", "inchikey": "YEUHHUCOSQOCIX-UHFFFAOYSA-N", "smiles": "S1C2=C(C=CC=C2)C2=C1C1=CC=CC=C1C=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": {"91.05425": "Theoretical m/z 91.054775, Mass diff 0 (0 ppm), Formula C7H7", "163.05408": "Theoretical m/z 163.054775, Mass diff 0 (0 ppm), Formula C13H7", "187.05406": "Theoretical m/z 187.054775, Mass diff 0 (0 ppm), Formula C15H7", "189.06969": "Theoretical m/z 189.070425, Mass diff 0 (0 ppm), Formula C15H9", "201.06973": "Theoretical m/z 201.070425, Mass diff 0 (0 ppm), Formula C16H9", "233.04245": "Theoretical m/z 233.042496, Mass diff -0.001 (0 ppm), Formula C16H9S", "234.04965": "Theoretical m/z 234.049775, Mass diff 0 (0.53 ppm), SMILES S1C=2C=CC=CC2C=3C=CC=4C=CC=CC4C13, Annotation [C16H10S]+, Rule of HR False"}, "num_peaks": "23", "compound_name": "Benzo[b]naphtho[2,1-d]thiophene", "retention_time": null, "retention_index": 2419.3, "precursor_mz": 234.04965, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[91.05425, 49130.0], [93.52686, 61653.0], [94.53469, 76638.0], [104.01679, 126871.0], [116.01675, 112547.0], [117.02459, 99108.0], [162.04623, 54491.0], [163.05408, 136418.0], [164.06198, 56321.0], [187.05406, 273349.0], [188.06189, 200750.0], [189.06969, 453336.0], [190.07306, 77884.0], [200.06189, 135620.0], [201.06973, 74692.0], [202.07758, 425058.0], [203.08086, 72058.0], [226.07762, 63460.0], [232.03409, 769638.0], [233.04245, 382457.0], [234.04965, 4404102.0], [235.05304, 725574.0], [236.04547, 251450.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C17H12", "inchikey": "HAPOJKSPCGLOOD-UHFFFAOYSA-N", "smiles": "C1C2=CC=CC=C2C2=C1C=C1C=CC=CC1=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": {"163.05414": "Theoretical m/z 163.054775, Mass diff 0 (0 ppm), Formula C13H7", "187.05412": "Theoretical m/z 187.054775, Mass diff 0 (0 ppm), Formula C15H7", "189.06975": "Theoretical m/z 189.070425, Mass diff 0 (0 ppm), Formula C15H9", "211.05426": "Theoretical m/z 211.054775, Mass diff 0 (0 ppm), Formula C17H7", "213.0699": "Theoretical m/z 213.070425, Mass diff 0 (0 ppm), Formula C17H9", "215.0855": "Theoretical m/z 215.086075, Mass diff 0 (0 ppm), Formula C17H11", "216.09326": "Theoretical m/z 216.093354, Mass diff 0 (0.44 ppm), SMILES C=1C=CC=2C=C3C(=CC2C1)C=4C=CC=CC4C3, Annotation [C17H12]+, Rule of HR False"}, "num_peaks": "21", "compound_name": "2,3-Benzofluorene", "retention_time": null, "retention_index": 2257.5, "precursor_mz": 216.09326, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[93.52689, 122035.0], [94.53471, 241743.0], [95.03638, 42130.0], [105.52688, 47526.0], [106.53471, 171653.0], [107.03639, 60004.0], [107.5425, 180288.0], [108.03385, 97471.0], [163.05414, 68194.0], [187.05412, 167183.0], [188.06201, 86262.0], [189.06975, 239421.0], [190.07321, 37778.0], [211.05426, 117044.0], [212.06215, 60813.0], [213.0699, 808499.0], [214.07317, 202222.0], [215.0855, 3634570.0], [216.09326, 2535030.0], [217.09671, 444170.0], [218.09999, 37976.0]]}], "n_row": 7, "n_col": 7, "row": [0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 6, 6], "col": [0, 1, 0, 1, 2, 3, 2, 3, 4, 3, 4, 6, 5, 4, 6], "data": [[true], [true], [true], [true], [true], [true], [true], [true], [true], [true], [true], [true], [true], [true], [true]], "dtype": [["MetadataMatch_retention_index_difference_60.0_f0", "|b1"]]}\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 0a0529822d91 test-data/usescore_json_score_out.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/usescore_json_score_out.json Mon Sep 02 12:12:30 2024 +0000
[
b'@@ -0,0 +1,1 @@\n+{"__Scores__": true, "is_symmetric": false, "references": [{"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "YNPNZTXNASCQKK-UHFFFAOYSA-N", "smiles": "C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "19", "compound_name": "Phenanthrene", "retention_time": null, "retention_index": 1832.9, "precursor_mz": 178.0775, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[74.01508, 137808.0], [75.02295, 278714.0], [76.03075, 608417.0], [87.02295, 304266.0], [88.03076, 497050.0], [89.03857, 441168.0], [98.01511, 150478.0], [150.04633, 868927.0], [151.05415, 546351.0], [152.0619, 2275502.0], [153.06528, 276320.0], [169.06468, 272559.0], [174.04636, 365846.0], [175.05423, 272039.0], [176.062, 3370523.0], [177.06982, 1751846.0], [178.0775, 13724432.0], [179.08078, 2250119.0], [180.08412, 138203.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "MWPLVEDNUUSJAV-UHFFFAOYSA-N", "smiles": "C1=CC2=CC3=C(C=CC=C3)C=C2C=C1", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "21", "compound_name": "Anthracene", "retention_time": null, "retention_index": 1844.4, "precursor_mz": 178.07754, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[74.0151, 117371.0], [75.02296, 245305.0], [76.03077, 632686.0], [87.02297, 236214.0], [88.03078, 347832.0], [89.0386, 507288.0], [91.05425, 130861.0], [126.04636, 128356.0], [149.04478, 126331.0], [150.04637, 641829.0], [151.05415, 403195.0], [152.06195, 1369833.0], [153.06534, 175290.0], [169.06471, 228827.0], [174.04639, 255716.0], [175.05423, 198784.0], [176.06204, 2563053.0], [177.06984, 1182037.0], [178.07754, 11002398.0], [179.08081, 1778803.0], [180.08418, 132922.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C16H10", "inchikey": "GVEPBJHOBDJJJI-UHFFFAOYSA-N", "smiles": "C1=CC2=C(C=C1)C1=C3C2=CC=CC3=CC=C1", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "17", "compound_name": "Fluoranthene", "retention_time": null, "retention_index": 2102.7, "precursor_mz": 202.07756, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[75.02299, 112456.0], [87.02298, 183640.0], [88.03079, 367434.0], [99.02296, 124952.0], [100.03078, 376079.0], [101.03863, 381288.0], [150.04642, 86059.0], [174.04634, 246963.0], [176.06194, 141676.0], [198.0464, 244370.0], [199.05429, 285767.0], [200.06207, 1958890.0], [201.06982, 1103710.0], [202.07756, 8104188.0], [203.08084, 1377015.0], [204.08421, 98067.0], [219.08043, 186623.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C16H10", "inchikey": "BBEAQIROQSPTKN-UHFFFAOYSA-N", "smiles": "C1=CC2=C3C(C=CC4=CC=CC(C=C2)=C34)=C1", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "16", "compound_name": "Pyrene", "retention_time": null, "retention_index": 2154.5, "precursor_mz": 202.07759, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[87.02299, 147113.0], [88.0308, 305149.0], [99.02298, 137042.0], [100.0308, 508914.0], [101.03864, 472094.0], [101.54032, 82430.0], [150.04637, 80741.0], [174.04631, 212706.0], [198.04643, 262925.0], [199.05429, 295144.0], [200.06209, 1960712.0], [201.06982, 1270466.0], [202.'..b'"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C18H14", "inchikey": "XJKSTNDFUHDPQJ-UHFFFAOYSA-N", "smiles": "C1=CC=C(C=C1)C2=CC=C(C=C2)C3=CC=CC=C3", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "25", "compound_name": "para-Terphenyl", "retention_time": null, "retention_index": 2207.5, "precursor_mz": 230.10886, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[88.03077, 233962.0], [101.03861, 353544.0], [113.03854, 437845.0], [115.05423, 690291.0], [128.062, 293046.0], [151.05412, 266397.0], [152.06195, 961184.0], [153.06976, 409346.0], [176.06192, 232793.0], [189.06972, 408734.0], [200.0619, 342984.0], [201.06986, 238974.0], [202.07758, 1292332.0], [203.08093, 374439.0], [213.06973, 256631.0], [215.08548, 1220866.0], [216.08881, 217320.0], [224.062, 181393.0], [226.07771, 1503774.0], [227.08588, 1198323.0], [228.09344, 2885510.0], [229.10123, 1791323.0], [230.10886, 17890056.0], [231.11214, 3536976.0], [232.11542, 320231.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C16H10S", "inchikey": "YEUHHUCOSQOCIX-UHFFFAOYSA-N", "smiles": "S1C2=C(C=CC=C2)C2=C1C1=CC=CC=C1C=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "23", "compound_name": "Benzo[b]naphtho[2,1-d]thiophene", "retention_time": null, "retention_index": 2419.3, "precursor_mz": 234.04965, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[91.05425, 49130.0], [93.52686, 61653.0], [94.53469, 76638.0], [104.01679, 126871.0], [116.01675, 112547.0], [117.02459, 99108.0], [162.04623, 54491.0], [163.05408, 136418.0], [164.06198, 56321.0], [187.05406, 273349.0], [188.06189, 200750.0], [189.06969, 453336.0], [190.07306, 77884.0], [200.06189, 135620.0], [201.06973, 74692.0], [202.07758, 425058.0], [203.08086, 72058.0], [226.07762, 63460.0], [232.03409, 769638.0], [233.04245, 382457.0], [234.04965, 4404102.0], [235.05304, 725574.0], [236.04547, 251450.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C17H12", "inchikey": "HAPOJKSPCGLOOD-UHFFFAOYSA-N", "smiles": "C1C2=CC=CC=C2C2=C1C=C1C=CC=CC1=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "21", "compound_name": "2,3-Benzofluorene", "retention_time": null, "retention_index": 2257.5, "precursor_mz": 216.09326, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[93.52689, 122035.0], [94.53471, 241743.0], [95.03638, 42130.0], [105.52688, 47526.0], [106.53471, 171653.0], [107.03639, 60004.0], [107.5425, 180288.0], [108.03385, 97471.0], [163.05414, 68194.0], [187.05412, 167183.0], [188.06201, 86262.0], [189.06975, 239421.0], [190.07321, 37778.0], [211.05426, 117044.0], [212.06215, 60813.0], [213.0699, 808499.0], [214.07317, 202222.0], [215.0855, 3634570.0], [216.09326, 2535030.0], [217.09671, 444170.0], [218.09999, 37976.0]]}], "n_row": 7, "n_col": 7, "row": [0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 6, 6], "col": [0, 1, 0, 1, 2, 3, 2, 3, 4, 3, 4, 6, 5, 4, 6], "data": [[true, 0.9999999999999999], [true, 0.9856430666837027], [true, 0.9856430666837027], [true, 0.9999999999999999], [true, 1.0], [true, 0.9745244470343223], [true, 0.9745244470343223], [true, 1.0], [true, 0.5206466095078924], [true, 0.5206466095078924], [true, 1.0], [true, 0.008969078696503796], [true, 1.0], [true, 0.008969078696503796], [true, 0.9999999999999999]], "dtype": [["MetadataMatch_retention_index_difference_60.0_f0", "|b1"], ["MS2DeepScore", "<f8"]]}\n\\ No newline at end of file\n'