Repository 'spec2vec_similarity'
hg clone https://toolshed.g2.bx.psu.edu/repos/recetox/spec2vec_similarity

Changeset 0:721a4e666191 (2023-06-27)
Commit message:
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit f79a5b51599254817727bc9028b9797ea994cb4e
added:
macros.xml
spec2vec_similarity.xml
test-data/RECETOX_Exposome_pesticides_HR_MS_normalized_20220323.msp
test-data/inp_filtered_library.msp
test-data/inp_filtered_spectra.msp
test-data/model.json
test-data/model_100.json
test-data/model_vector_size_100.json
test-data/ri_match_60.json
test-data/s2v_scores_test1_out.json
test-data/s2v_scores_test2_out.json
test-data/weights_100.binary
b
diff -r 000000000000 -r 721a4e666191 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Jun 27 14:30:10 2023 +0000
[
@@ -0,0 +1,78 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.8.0</token>
+
+    <xml name="creator">
+        <creator>
+            <person
+                givenName="Maksym"
+                familyName="Skoryk"
+                url="https://github.com/maximskorik"
+                identifier="0000-0003-2056-8018" />
+            <person
+                givenName="Helge"
+                familyName="Hecht"
+                url="https://github.com/hechth"
+                identifier="0000-0001-6744-996X" />
+            <organization
+                url="https://www.recetox.muni.cz/"
+                email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
+                name="RECETOX MUNI" />
+        </creator>
+    </xml>
+
+    <xml name="edam">
+        <xrefs>
+            <xref type="bio.tools">spec2vec</xref>
+        </xrefs>
+    </xml>
+
+    <xml name="input_param">
+        <conditional name="scores">
+            <param name="use_scores" label="Use Scores Object" type="boolean" truevalue="TRUE" falsevalue="FALSE"
+                   checked="false"/>
+            <when value="TRUE">
+                <param label="Scores object" name="scores_in" type="data" format="json"
+                    help="Scores objects calculated previously using one of the matchms similarity tools." />
+            </when>
+            <when value="FALSE">
+                <param label="Queries spectra" name="queries" type="data" format="msp,mgf"
+                    help="Query mass spectra to match against references."/>
+                <param label="Reference spectra" name="references" type="data" format="msp,mgf"
+                    help="Reference mass spectra to match against as library."/>
+            </when>
+        </conditional>
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008724</citation>
+        </citations>
+    </xml>
+
+<token name="@init_scores@">
+from matchms.importing import load_from_msp, scores_from_json
+from matchms import Scores
+#if $scores.use_scores
+scores = scores_from_json("$scores_in")
+#else
+scores = Scores(references=list(load_from_msp("$references")), queries=list(load_from_msp("$queries")), is_symmetric=False)
+#end if
+</token>
+        
+<token name="@init_logger@">
+from matchms import set_matchms_logger_level
+set_matchms_logger_level("WARNING")
+</token>
+
+<token name="@init_model@">
+import json
+from spec2vec.serialization.model_importing import load_weights, Word2VecLight
+    
+with open("${model_metadata}", "r", encoding="utf-8") as f:
+    model: dict = json.load(f)
+    del (model["mapfile_path"])
+    
+weights = load_weights("${model_weights}", model["__weights_format"])
+model = Word2VecLight(model, weights)
+</token>
+</macros>
b
diff -r 000000000000 -r 721a4e666191 spec2vec_similarity.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/spec2vec_similarity.xml Tue Jun 27 14:30:10 2023 +0000
[
@@ -0,0 +1,84 @@
+<tool id="spec2vec_similarity" name="spec2vec similarity" version="@TOOL_VERSION@+galaxy0" profile="21.09">
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="creator"/>
+    <expand macro="edam" />
+
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">spec2vec</requirement>
+    </requirements>
+
+    <command detect_errors="aggressive"><![CDATA[
+        python3 ${python_wrapper}
+    ]]></command>
+<configfiles>
+<configfile name="python_wrapper">
+@init_logger@
+@init_model@
+
+import numpy as np
+from spec2vec import Spec2Vec
+
+similarity = Spec2Vec(
+    model,
+    intensity_weighting_power=${intensity_power},
+    allowed_missing_percentage=${allow_missing_percentage} * 100
+)
+name="Spec2Vec_${intensity_power}_${allow_missing_percentage}"
+
+@init_scores@
+
+from matchms.filtering import normalize_intensities
+
+layer = similarity.sparse_array(
+    references=np.asarray(list(map(normalize_intensities, scores.references))),
+    queries=np.asarray(list(map(normalize_intensities, scores.queries))),
+    idx_row = scores._scores.row,
+    idx_col = scores._scores.col,
+    is_symmetric=False)
+
+scores._scores.add_sparse_data(scores._scores.row, scores._scores.col, layer, name)
+
+scores.filter_by_range(inplace=True, name=name, low=0)
+scores.to_json("$similarity_scores")
+</configfile>
+</configfiles>
+
+    <inputs>
+        <expand macro="input_param" />
+        <param label="Model JSON file" name="model_metadata" type="data" format="json"
+            help="Model JSON file to use for Spec2Vec similarity computing."/>
+        <param label="Model NPY file" name="model_weights" type="data" format="binary"
+            help="Model NPY file to use for Spec2Vec similarity computing."/>
+        <param label="intensity_power" name="intensity_power" type="float" value="0.0"
+            help="Spectrum vectors are a weighted sum of the word vectors. The given word intensities will be raised to the given power. 
+            The default is 0, which means that no weighing will be done."/>
+        <param label="Maximum share of new peaks" name="allow_missing_percentage"  type="float" value="0.1" max="1.0" min="0.0"
+            help="Maximum allowed share of the peaks that are new to the model in relation to the whole peak corpus."/>
+    </inputs>
+    <outputs>
+        <data label="Spec2Vec scores of ${on_string}" name="similarity_scores" format="json"/>
+    </outputs>
+
+    <tests>
+        <test> <!-- TEST #1: Test Spec2Vec. -->
+            <param name="references" value="inp_filtered_library.msp" ftype="msp"/>
+            <param name="queries" value="inp_filtered_spectra.msp" ftype="msp"/>
+            <param name="model_metadata" value="model_100.json" ftype="json"/>
+            <param name="model_weights" value="weights_100.binary" ftype="auto"/>
+            <param name="allow_missing_percentage" value="1.0"/>
+            <output name="similarity_scores" file="s2v_scores_test1_out.json" ftype="json"/>
+        </test>
+        <test>
+            <param name="use_scores" value="True"/>
+            <param name="scores_in" value="ri_match_60.json" ftype="json"/>
+            <param name="model_metadata" value="model_100.json" ftype="json"/>
+            <param name="model_weights" value="weights_100.binary" ftype="auto"/>
+            <param name="allow_missing_percentage" value="1.0"/>
+            <output name="similarity_scores" value="s2v_scores_test2_out.json" ftype="json" />
+        </test>
+    </tests>
+
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r 721a4e666191 test-data/RECETOX_Exposome_pesticides_HR_MS_normalized_20220323.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/RECETOX_Exposome_pesticides_HR_MS_normalized_20220323.msp Tue Jun 27 14:30:10 2023 +0000
[
b'@@ -0,0 +1,6548 @@\n+SCANNUMBER: 1161\n+PRECURSORTYPE: [M+H]+\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C4H10NO3PS\n+INCHIKEY: YASYVMFAVPKPKE-SECBINFHSA-N\n+INCHI: \n+SMILES: COP(=O)(N=C(O)C)SC\n+AUTHORS: Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)\n+INSTRUMENT: LC Orbitrap Fusion Tribrid MS\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+IONIZATION: ESI+\n+LICENSE: CC BY-NC\n+COMMENT: \n+COMPOUND_NAME: Acephate\n+RETENTION_TIME: 1.232997\n+PRECURSOR_MZ: 184.0194\n+COLLISION_ENERGY: \n+NUM PEAKS: 16\n+90.09368    0.029663134088936807\n+93.11512    0.03263470691876824\n+95.10279    0.02940016304204907\n+101.31465   0.030294264601467377\n+102.90688   0.03476477239855892\n+103.98039   0.03158282273121729\n+112.01607   0.3231651195203408\n+112.99994   1.0\n+115.00399   0.042969469061456336\n+124.98121   0.02424593052304941\n+128.97701   0.24214373997422883\n+132.57193   0.03550109132984458\n+135.84808   0.037552265495568934\n+142.99275   0.43177216188497647\n+147.94205   0.046019933205354094\n+173.5094    0.06187708733268467\n+\n+SCANNUMBER: 2257\n+PRECURSORTYPE: [M+H]+\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C12H11NO2\n+INCHIKEY: CVXBEEMKQHEXEN-UHFFFAOYSA-N\n+INCHI: \n+SMILES: CN=C(Oc1cccc2c1cccc2)O\n+AUTHORS: Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)\n+INSTRUMENT: LC Orbitrap Fusion Tribrid MS\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+IONIZATION: ESI+\n+LICENSE: CC BY-NC\n+COMMENT: \n+COMPOUND_NAME: Carbaryl\n+RETENTION_TIME: 5.259445\n+PRECURSOR_MZ: 202.0863\n+COLLISION_ENERGY: \n+NUM PEAKS: 1\n+145.06491   1.0         "Theoretical m/z 145.064787, Mass diff 0 (0.85 ppm), SMILES OC1=CC=CC=2C=CC=CC12, Annotation [C10H8O+H]+, Rule of HR True"\n+\n+SCANNUMBER: 1516\n+PRECURSORTYPE: [M+H]+\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C8H16NO5P\n+INCHIKEY: VEENJGZXVHKXNB-UHFFFAOYSA-N\n+INCHI: \n+SMILES: COP(=O)(OC(=CC(=O)N(C)C)C)OC\n+AUTHORS: Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)\n+INSTRUMENT: LC Orbitrap Fusion Tribrid MS\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+IONIZATION: ESI+\n+LICENSE: CC BY-NC\n+COMMENT: \n+COMPOUND_NAME: Dicrotophos\n+RETENTION_TIME: 2.025499\n+PRECURSOR_MZ: 238.0844\n+COLLISION_ENERGY: \n+NUM PEAKS: 5\n+112.074     0.01124761836832089\n+112.07591   1.0         "Theoretical m/z 112.075687, Mass diff 0 (1.99 ppm), SMILES O=C(C=CC)N(C)C, Annotation [C6H11NO-H]+, Rule of HR True"\n+127.01563   0.35611747652157366     "Theoretical m/z 127.01547, Mass diff 0 (1.26 ppm), SMILES O=P(O)(OC)OC, Annotation [C2H7O4P+H]+, Rule of HR True"\n+193.02605   0.8706598300714133      "Theoretical m/z 193.026035, Mass diff 0 (0.08 ppm), SMILES O=CC=C(OP(=O)(OC)OC)C, Annotation [C6H11O5P-H]+, Rule of HR True"\n+238.08437   0.32776190727646287     "Theoretical m/z 238.083891, Mass diff 0 (2.01 ppm), SMILES O=C(C=C(OP(=O)(OC)OC)C)N(C)C, Annotation [C8H16NO5P+H]+, Rule of HR True"\n+\n+SCANNUMBER: 1865\n+PRECURSORTYPE: [M+H]+\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C5H12NO3PS2\n+INCHIKEY: MCWXGJITAZMZEV-UHFFFAOYSA-N\n+INCHI: \n+SMILES: CN=C(CSP(=S)(OC)OC)O\n+AUTHORS: Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)\n+INSTRUMENT: LC Orbitrap Fusion Tribrid MS\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+IONIZATION: ESI+\n+LICENSE: CC BY-NC\n+COMMENT: \n+COMPOUND_NAME: Dimethoate\n+RETENTION_TIME: 2.866696\n+PRECURSOR_MZ: 230.0072\n+COLLISION_ENERGY: \n+NUM PEAKS: 8\n+88.0219     0.10126528522417098     "Theoretical m/z 88.021549, Mass diff 0 (3.99 ppm), SMILES SCC=NC, Annotation [C3H7NS-H]+, Rule of HR True"\n+124.98233   0.03394816737947091     "Theoretical m/z 124.982067, Mass diff 0 (2.11 ppm), SMILES S=P(OC)OC, Annotation [C2H7O2PS-H]+, Rule of HR True"\n+142.99275   0.13332015000924125     "Theoretical m/z 142.993177, Mass diff 0 (0 ppm), Formula C2H8O3PS"\n+156.95422   0.014917466667331371    "Theoretical m/z 156.954136, Mass diff 0 (0.54 ppm), SMILES S=P(S)(OC)OC, Annotation [C2H7O2PS2-H]+, Rule of HR True"\n+170.97      0.2633444689954621      "Theoretical m/z 170.969791, Mass diff 0 (1.22 ppm), SMILES S=P(OC'..b'cal Laboratories, RECETOX, Masaryk University (CZ)\n+INSTRUMENT: LC Orbitrap Fusion Tribrid MS\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+IONIZATION: ESI+\n+LICENSE: CC BY-NC\n+COMMENT: \n+COMPOUND_NAME: Mexacarbate\n+RETENTION_TIME: 1.682191\n+PRECURSOR_MZ: 223.1443\n+COLLISION_ENERGY: \n+NUM PEAKS: 5\n+134.07283   0.048004709909413995    "Theoretical m/z 134.072623, Mass diff 0 (1.55 ppm), SMILES O(C=1C=C(C=C(C1)C)C)C, Annotation [C9H12O-2H]+, Rule of HR False"\n+136.07611   0.474709014573502       "Theoretical m/z 136.076239, Mass diff 0 (0 ppm), Formula C8H10NO"\n+150.092     0.02866330157050705     "Theoretical m/z 150.091343, Mass diff 0.001 (4.38 ppm), SMILES OC1=CC=C(C(=C1)C)N(C)C, Annotation [C9H13NO-H]+, Rule of HR True"\n+151.09932   1.0         "Theoretical m/z 151.099168, Mass diff 0 (1.01 ppm), SMILES OC1=CC=C(C(=C1)C)N(C)C, Annotation [C9H13NO]+, Rule of HR False"\n+166.12282   0.02811286892205852     "Theoretical m/z 166.122633, Mass diff 0 (1.13 ppm), SMILES OC=1C=C(C(=C(C1)C)N(C)C)C, Annotation [C10H15NO+H]+, Rule of HR True"\n+\n+SCANNUMBER: 3999\n+PRECURSORTYPE: [M+H]+\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C19H21N2OCl\n+INCHIKEY: OGYFATSSENRIKG-UHFFFAOYSA-N\n+INCHI: \n+SMILES: Clc1ccc(cc1)CN(C(=Nc1ccccc1)O)C1CCCC1\n+AUTHORS: Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)\n+INSTRUMENT: LC Orbitrap Fusion Tribrid MS\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+IONIZATION: ESI+\n+LICENSE: CC BY-NC\n+COMMENT: \n+COMPOUND_NAME: Monceren\n+RETENTION_TIME: 7.14553\n+PRECURSOR_MZ: 329.1426\n+COLLISION_ENERGY: \n+NUM PEAKS: 5\n+89.03881    0.014711534999784734    "Theoretical m/z 89.038575, Mass diff 0 (2.63 ppm), SMILES C=1C=CC(=CC1)C, Annotation [C7H8-3H]+, Rule of HR True"\n+94.06543    0.016966589174607548    "Theoretical m/z 94.065123, Mass diff 0 (3.27 ppm), SMILES NC=1C=CC=CC1, Annotation [C6H7N+H]+, Rule of HR True"\n+106.06545   0.011922830429775924    "Theoretical m/z 106.065123, Mass diff 0 (3.09 ppm), SMILES N(=C)C=1C=CC=CC1, Annotation [C7H7N+H]+, Rule of HR True"\n+125.01307   0.013678447019393882\n+125.01532   1.0         "Theoretical m/z 125.015255, Mass diff 0 (0.52 ppm), SMILES ClC1=CC=C(C=C1)C, Annotation [C7H7Cl-H]+, Rule of HR True"\n+\n+SCANNUMBER: 2271\n+PRECURSORTYPE: [M+H]+\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C16H16N2O4\n+INCHIKEY: WZJZMXBKUWKXTQ-UHFFFAOYSA-N\n+INCHI: \n+SMILES: CCOC(=Nc1cccc(c1)OC(=Nc1ccccc1)O)O\n+AUTHORS: Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)\n+INSTRUMENT: LC Orbitrap Fusion Tribrid MS\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+IONIZATION: ESI+\n+LICENSE: CC BY-NC\n+COMMENT: \n+COMPOUND_NAME: Desmedipham\n+RETENTION_TIME: 6.430396\n+PRECURSOR_MZ: 301.1192\n+COLLISION_ENERGY: \n+NUM PEAKS: 3\n+136.03947   0.2736671949482495      "Theoretical m/z 136.03931, Mass diff 0 (1.18 ppm), SMILES OC(O)=NC=1C=CC=CC1, Annotation [C7H7NO2-H]+, Rule of HR True"\n+154.04993   0.15474967323186417     "Theoretical m/z 154.049864, Mass diff 0 (0.43 ppm), SMILES OC(O)=NC=1C=CC=C(O)C1, Annotation [C7H7NO3+H]+, Rule of HR True"\n+182.08162   1.0         "Theoretical m/z 182.081175, Mass diff 0 (2.45 ppm), SMILES OC(=NC=1C=CC=C(O)C1)OCC, Annotation [C9H11NO3+H]+, Rule of HR True"\n+\n+SCANNUMBER: 2458\n+PRECURSORTYPE: [M+H]+\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C16H16N2O4\n+INCHIKEY: IDOWTHOLJBTAFI-UHFFFAOYSA-N\n+INCHI: \n+SMILES: COC(=Nc1cccc(c1)OC(=Nc1cccc(c1)C)O)O\n+AUTHORS: Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)\n+INSTRUMENT: LC Orbitrap Fusion Tribrid MS\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+IONIZATION: ESI+\n+LICENSE: CC BY-NC\n+COMMENT: \n+COMPOUND_NAME: Phenmedipham\n+RETENTION_TIME: 6.570995\n+PRECURSOR_MZ: 301.1185\n+COLLISION_ENERGY: \n+NUM PEAKS: 2\n+136.03947   0.36898395493981717     "Theoretical m/z 136.03931, Mass diff 0 (1.18 ppm), SMILES OC(O)=NC=1C=CC=CC1, Annotation [C7H7NO2-H]+, Rule of HR True"\n+168.06587   1.0         "Theoretical m/z 168.065519, Mass diff 0 (2.09 ppm), SMILES OC(=NC=1C=CC=C(O)C1)OC, Annotation [C8H9NO3+H]+, Rule of HR True"\n+\n'
b
diff -r 000000000000 -r 721a4e666191 test-data/inp_filtered_library.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inp_filtered_library.msp Tue Jun 27 14:30:10 2023 +0000
[
b'@@ -0,0 +1,4205 @@\n+SCANNUMBER: -1\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C20H12\n+INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N\n+SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2\n+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n+IONIZATION: EI+\n+LICENSE: CC BY-NC\n+COMPOUND_NAME: Perylene\n+RETENTION_TIME: None\n+RETENTION_INDEX: 2886.9\n+PRECURSOR_MZ: 252.09323\n+ADDUCT: [M]+\n+COLLISION_ENERGY: 70eV\n+INSTRUMENT_TYPE: GC-EI-Orbitrap\n+CHARGE: 1\n+PARENT_MASS: 251.08595400000002\n+NUM PEAKS: 3\n+250.07765   0.3282529462971431\n+252.09323   1.0         "Theoretical m/z 252.093354, Mass diff 0 (0.49 ppm), SMILES C1=CC=2C=CC=C3C4=CC=CC5=CC=CC(C(=C1)C23)=C54, Annotation [C20H12]+, Rule of HR False"\n+253.09656   0.20573802940517583\n+\n+SCANNUMBER: -1\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C14H10\n+INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N\n+SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2\n+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n+IONIZATION: EI+\n+LICENSE: CC BY-NC\n+COMPOUND_NAME: Phenanthrene\n+RETENTION_TIME: None\n+RETENTION_INDEX: 1832.9\n+PRECURSOR_MZ: 178.0775\n+ADDUCT: [M]+\n+COLLISION_ENERGY: 70eV\n+INSTRUMENT_TYPE: GC-EI-Orbitrap\n+CHARGE: 1\n+PARENT_MASS: 177.070224\n+NUM PEAKS: 5\n+152.0619    0.1657993569424221\n+176.062     0.24558560966311757     "Theoretical m/z 176.0626, Mass diff 0 (0 ppm), Formula C14H8"\n+177.06982   0.12764433529926775     "Theoretical m/z 177.070425, Mass diff 0 (0 ppm), Formula C14H9"\n+178.0775    1.0         "Theoretical m/z 178.077698, Mass diff 0 (1.11 ppm), SMILES C=1C=CC2=C(C1)C=CC=3C=CC=CC32, Annotation [C14H10]+, Rule of HR False"\n+179.08078   0.16394988149600653\n+\n+SCANNUMBER: -1\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C14H10\n+INCHIKEY: MWPLVEDNUUSJAV-UHFFFAOYSA-N\n+SMILES: C1=CC2=CC3=C(C=CC=C3)C=C2C=C1\n+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n+IONIZATION: EI+\n+LICENSE: CC BY-NC\n+COMPOUND_NAME: Anthracene\n+RETENTION_TIME: None\n+RETENTION_INDEX: 1844.4\n+PRECURSOR_MZ: 178.07754\n+ADDUCT: [M]+\n+COLLISION_ENERGY: 70eV\n+INSTRUMENT_TYPE: GC-EI-Orbitrap\n+CHARGE: 1\n+PARENT_MASS: 177.070264\n+NUM PEAKS: 5\n+152.06195   0.12450313104470498\n+176.06204   0.23295403420236208     "Theoretical m/z 176.0626, Mass diff 0 (0 ppm), Formula C14H8"\n+177.06984   0.1074344883724439      "Theoretical m/z 177.070425, Mass diff 0 (0 ppm), Formula C14H9"\n+178.07754   1.0         "Theoretical m/z 178.077698, Mass diff 0 (0.89 ppm), SMILES C=1C=CC=2C=C3C=CC=CC3=CC2C1, Annotation [C14H10]+, Rule of HR False"\n+179.08081   0.1616741186784917\n+\n+SCANNUMBER: -1\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C12H10\n+INCHIKEY: CWRYPZZKDGJXCA-UHFFFAOYSA-N\n+SMILES: C1CC2=C3C1=CC=CC3=CC=C2\n+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n+IONIZATION: EI+\n+LICENSE: CC BY-NC\n+COMPOUND_NAME: Acenaphthene\n+RETENTION_TIME: None\n+RETENTION_INDEX: 1528.3\n+PRECURSOR_MZ: 154.07741\n+ADDUCT: [M]+\n+COLLISION_ENERGY: 70eV\n+INSTRUMENT_TYPE: GC-EI-Orbitrap\n+CHARGE: 1\n+PARENT_MASS: 153.070134\n+NUM PEAKS: 4\n+151.05418   0.10238389021994407     "Theoretical m/z 151.054775, Mass diff 0 (0 ppm), Formula C12H7"\n+152.06194   0.4817565861859871\n+153.06969   1.0         "Theoretical m/z 153.070425, Mass diff 0 (0 ppm), Formula C12H9"\n+154.07741   0.6474388804646675      "Theoretical m/z 154.077698, Mass diff 0 (1.87 ppm), SMILES C=1C=C2C=CC=C3C2=C(C1)CC3, Annotation [C12H10]+, Rule of HR False"\n+\n+SCANNUMBER: -1\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C16H10\n+INCHIKEY: GVEPBJHOBDJJJI-UHFFFAOYSA-N\n+SMILES: C1=CC2=C(C=C1)C1=C3C2=CC=CC3=CC=C1\n+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n+IONIZATION: EI+\n+LICENSE: CC BY-NC\n+COMPOUND_NAME: Fluoranthene\n+RETENTION_TIME: None\n+RETENTION_INDEX: 2102.7\n+PRECURSOR_MZ: 202.07756\n+ADDUCT: [M]+\n+COLLISION_ENERGY: 70eV\n'..b'  "Theoretical m/z 235.066148, Mass diff 0 (0.18 ppm), SMILES O=C(NC1=CC=CC=C1)C2=C(OCCS2)C, Annotation [C12H13NO2S]+, Rule of HR False"\n+\n+SCANNUMBER: -1\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C13H18O5S\n+INCHIKEY: IRCMYGHHKLLGHV-UHFFFAOYSA-N\n+SMILES: CCOC1C(C2=C(O1)C=CC(=C2)OS(=O)(=O)C)(C)C\n+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n+IONIZATION: EI+\n+LICENSE: CC BY-NC\n+COMPOUND_NAME: Ethofumesate\n+RETENTION_TIME: None\n+RETENTION_INDEX: 1954.4\n+PRECURSOR_MZ: 286.08679\n+ADDUCT: [M]+\n+COLLISION_ENERGY: 70eV\n+INSTRUMENT_TYPE: GC-EI-Orbitrap\n+CHARGE: 1\n+PARENT_MASS: 285.079514\n+NUM PEAKS: 13\n+79.05419    0.1392834489952906      "Theoretical m/z 79.054228, Mass diff 0 (0.48 ppm), SMILES C1=CC=CC=C1, Annotation [C6H6+H]+, Rule of HR True"\n+91.0542     0.1456948233889637      "Theoretical m/z 91.054775, Mass diff 0 (0 ppm), Formula C7H7"\n+105.06984   0.29471655075385655     "Theoretical m/z 105.070425, Mass diff 0 (0 ppm), Formula C8H9"\n+115.05416   0.14265948876449097     "Theoretical m/z 115.054229, Mass diff 0 (0.6 ppm), SMILES C1=CC=C(C=C1)C(C)C, Annotation [C9H12-5H]+, Rule of HR True"\n+133.0647    0.3868381358475808      "Theoretical m/z 133.064798, Mass diff 0 (0.73 ppm), SMILES OC=1C=CC=C(C=1)C(C)C, Annotation [C9H12O-3H]+, Rule of HR True"\n+137.05962   0.565607729176301       "Theoretical m/z 137.060255, Mass diff 0 (0 ppm), Formula C8H9O2"\n+161.0596    1.0         "Theoretical m/z 161.059701, Mass diff 0 (0.63 ppm), SMILES OC=1C=CC=2OCC(C=2(C=1))(C)C, Annotation [C10H12O2-3H]+, Rule of HR True"\n+162.06293   0.1395183569693118\n+163.07518   0.1571750863529426      "Theoretical m/z 163.075351, Mass diff 0 (1.05 ppm), SMILES OC=1C=CC=2OCC(C=2(C=1))(C)C, Annotation [C10H12O2-H]+, Rule of HR True"\n+179.07016   0.3858738890199595      "Theoretical m/z 179.07027, Mass diff 0 (0.62 ppm), SMILES OC=1C=CC=2OC(O)C(C=2(C=1))(C)C, Annotation [C10H12O3-H]+, Rule of HR True"\n+207.1015    0.9146502804597079      "Theoretical m/z 207.101566, Mass diff 0 (0.32 ppm), SMILES OC=1C=CC=2OC(OCC)C(C=2(C=1))(C)C, Annotation [C12H16O3-H]+, Rule of HR True"\n+208.1048    0.1224932435932507\n+286.08679   0.294378369815484       "Theoretical m/z 286.086945, Mass diff 0 (0.54 ppm), SMILES O=S(=O)(OC=1C=CC=2OC(OCC)C(C=2(C=1))(C)C)C, Annotation [C13H18O5S]+, Rule of HR False"\n+\n+SCANNUMBER: -1\n+IONMODE: positive\n+SPECTRUMTYPE: Centroid\n+FORMULA: C17H17N3OS\n+INCHIKEY: LMVPQMGRYSRMIW-KRWDZBQOSA-N\n+SMILES: CC1(C(=O)N(C(=N1)SC)NC2=CC=CC=C2)C3=CC=CC=C3\n+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n+IONIZATION: EI+\n+LICENSE: CC BY-NC\n+COMPOUND_NAME: Fenamidone\n+RETENTION_TIME: None\n+RETENTION_INDEX: 2516.1\n+PRECURSOR_MZ: 311.10815\n+ADDUCT: [M]+\n+COLLISION_ENERGY: 70eV\n+INSTRUMENT_TYPE: GC-EI-Orbitrap\n+CHARGE: 1\n+PARENT_MASS: 310.10087400000003\n+NUM PEAKS: 10\n+77.03851    0.1341728006141734      "Theoretical m/z 77.038578, Mass diff 0 (0.88 ppm), SMILES C1=CC=CC=C1, Annotation [C6H6-H]+, Rule of HR True"\n+91.04161    0.14196713486951465     "Theoretical m/z 91.041647, Mass diff 0 (0.41 ppm), SMILES NC1=CC=CC=C1, Annotation [C6H7N-2H]+, Rule of HR False"\n+103.05415   0.13231626786608644     "Theoretical m/z 103.054223, Mass diff 0 (0.71 ppm), SMILES C1=CC=C(C=C1)CC, Annotation [C8H10-3H]+, Rule of HR True"\n+206.07457   0.2477103476704148      "Theoretical m/z 206.07464, Mass diff 0 (0.34 ppm), SMILES N(=C(NN)SC)C(C1=CC=CC=C1)C, Annotation [C10H15N3S-3H]+, Rule of HR True"\n+210.11507   0.1254196447843151\n+237.10208   0.5638187350251782      "Theoretical m/z 237.102243, Mass diff 0 (0.69 ppm), SMILES O=C(NNC1=CC=CC=C1)C(C2=CC=CC=C2)C, Annotation [C15H16N2O-3H]+, Rule of HR True"\n+238.10997   0.737876380592742\n+239.11317   0.12354824276317873\n+268.09      1.0         "Theoretical m/z 268.090295, Mass diff 0 (1.1 ppm), SMILES N(=C(NNC1=CC=CC=C1)S)C(C2=CC=CC=C2)C, Annotation [C15H17N3S-3H]+, Rule of HR True"\n+269.09351   0.15574634382295574\n+\n'
b
diff -r 000000000000 -r 721a4e666191 test-data/inp_filtered_spectra.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/inp_filtered_spectra.msp Tue Jun 27 14:30:10 2023 +0000
b
b'@@ -0,0 +1,1008 @@\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C019\n+RETENTION_TIME: 688.11\n+RETENTION_INDEX: 4135.446429\n+CHARGE: -1\n+NUM PEAKS: 21\n+164.985419247789        0.2949419846671587\n+179.033979756352        0.1975962017168221\n+194.04893073403         0.1080780080055076\n+248.988380501455        0.4192550107623055\n+251.037178293           0.1975188727945594\n+283.099929585291        0.24858700581303622\n+313.053040045895        0.15255184988804857\n+329.031653006854        0.12152318335456756\n+341.157248840923        0.33065016037294653\n+385.022947628725        0.11792404215662944\n+401.053681557414        0.5558197970588639\n+403.051400482668        0.10684249115159443\n+415.106669687654        1.0\n+416.107049345269        0.3343846089983622\n+417.104122333661        0.16072829222839785\n+489.124198650628        0.7880577919119076\n+535.10922525834         0.17951802003040962\n+550.163296442538        0.22193082425956692\n+551.161445828019        0.11285226875001307\n+564.146181690587        0.10968283775320925\n+623.183150220198        0.24882510323783946\n+\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C008\n+RETENTION_TIME: 383.25\n+RETENTION_INDEX: 2436.111111\n+CHARGE: -1\n+NUM PEAKS: 10\n+167.06348032557         0.2012680241749571\n+169.042872715042        0.22527813564183466\n+185.073952424469        0.1146092209329471\n+224.061333736415        0.637856954108077\n+241.084284214072        0.5378362429201462\n+243.086918863664        0.8440889152136807\n+245.102418520421        1.0\n+257.113614660022        0.7014464407039914\n+258.111154671539        0.45403443749116\n+315.119168534318        0.2802677742648726\n+\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C001\n+RETENTION_TIME: 268.99\n+RETENTION_INDEX: 1800.989583\n+CHARGE: -1\n+NUM PEAKS: 7\n+147.065597668017        0.20713460605355152\n+149.044686744287        0.15021454193335101\n+183.047097257536        0.2756365347787095\n+257.066031671279        0.16716705800250425\n+273.097313808265        1.0\n+274.099806030141        0.1382750943677773\n+347.116266654718        0.10796331331103011\n+\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C010\n+RETENTION_TIME: 271.76\n+RETENTION_INDEX: 1815.416667\n+CHARGE: -1\n+NUM PEAKS: 12\n+76.0323849976885        0.5337096603553363\n+107.129167494293        0.17750725429827036\n+120.05545372843         0.732323548309492\n+132.054968772294        0.6582449478277536\n+136.084083848357        0.18110012219874336\n+138.177598969977        0.10980088538234019\n+311.387345297053        0.3149229368865709\n+312.394070075839        1.0\n+313.396963036504        0.1996701454734163\n+329.397982197985        0.17215789049120694\n+330.403963356557        0.5441264876224288\n+331.407288294656        0.1075489761979763\n+\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C009\n+RETENTION_TIME: 224.1\n+RETENTION_INDEX: 1606.578947\n+CHARGE: -1\n+NUM PEAKS: 5\n+128.088974937905        0.30238672398872746\n+142.104728421893        0.13125344469355718\n+230.102912184687        0.13629903834332716\n+246.13394892703         1.0\n+247.135560589937        0.1027800448922141\n+\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C011\n+RETENTION_TIME: 322.28\n+RETENTION_INDEX: 2083.777778\n+CHARGE: -1\n+NUM PEAKS: 8\n+191.091545005862        0.3661694612326083\n+204.099524919261        0.1379655416395176\n+217.107439740029        0.8659676702439103\n+221.084188869749        0.13224939836684152\n+265.110639707297        0.12404535021125133\n+305.141591931944        1.0\n+306.142075983677        0.19894438269593562\n+318.149353523284        0.5161780338363837\n+\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C013\n+RETENTION_TIME: 471.03\n+RETENTION_INDEX: 3168.684211\n+CHARGE: -1\n+NUM PEAKS: 6\n+78.0464517452347        0.13019193533313522\n+155.087332200892        0.16981875175936348\n+207.101729466849        1.0\n+313.053036971607        0.12108100658431745\n+353.320009154806        0.2895861096886366\n+624.30876303981         0.14852029190801175\n'..b'349696378594456\n+196.992752093539        0.3065603768824057\n+197.974508055988        0.395078144825856\n+198.95441899853         0.20984023997622794\n+212.970014961884        0.312364140356306\n+213.006147303489        0.3451086913299261\n+214.98560992714         0.5058106319186024\n+220.957069324217        0.1741467403445935\n+222.936453968368        0.1925777417566011\n+229.019119616188        1.0\n+230.00074243328         0.4313373151032761\n+236.98832473252         0.17144823687024496\n+240.946899738586        0.37016492220221137\n+253.982966844011        0.2619026170220304\n+269.977845781938        0.369206087321791\n+270.975426915952        0.26702156040941716\n+270.993770069105        0.5558156567281477\n+297.079403794841        0.3276080388276794\n+298.934550867121        0.27505460413538213\n+328.980928284848        0.30053783536988743\n+330.960641905517        0.36106011190503906\n+372.953096157169        0.24565144306023193\n+\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C018\n+RETENTION_TIME: 687.52\n+RETENTION_INDEX: 4133.690476\n+CHARGE: -1\n+NUM PEAKS: 9\n+209.011554808631        0.6525630494040975\n+209.029113642814        0.10839584784422365\n+210.990970491228        0.15000447517519525\n+226.041539764125        0.3130785916148705\n+227.039590062704        0.17080278851135594\n+265.020169474237        0.2407632085362116\n+281.051364215883        1.0\n+282.049803555434        0.21798058347517868\n+285.009414657485        0.12222513080126347\n+\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C012\n+RETENTION_TIME: 687.83\n+RETENTION_INDEX: 4134.613095\n+CHARGE: -1\n+NUM PEAKS: 27\n+121.046741626167        0.12537265500763864\n+192.997735629267        0.1940732067111916\n+205.016711322548        0.12056941717717026\n+209.098935410027        0.12559326156418593\n+213.05754958053         0.27142481482801556\n+253.016653119059        0.8938154160284864\n+271.027137265637        0.1375157398831428\n+284.047997947258        0.1709644868199992\n+325.985953604199        0.17320279707305308\n+327.03525664153         1.0\n+328.035343658           0.26668492712822667\n+345.046169172075        0.17928530473894783\n+359.09769204202         0.2008606829314974\n+360.028136457829        0.21741111817985187\n+387.002413998209        0.2632563912022333\n+387.071825807939        0.3071277807533759\n+402.055174276849        0.2538777644377674\n+461.090256355859        0.3629780005380489\n+462.091555156095        0.14179921776218857\n+475.072310244956        0.27595658697326014\n+476.143434019655        0.2650001066007337\n+477.140480798609        0.10059042565917092\n+490.12632015072         0.3700192231231476\n+491.122346777972        0.20934752539009766\n+549.162553092955        0.5034364902937184\n+563.144239233773        0.2590935918637112\n+624.183209223215        0.1451500409395642\n+\n+IONMODE: negative\n+SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C020\n+RETENTION_TIME: 687.99\n+RETENTION_INDEX: 4135.089286\n+CHARGE: -1\n+NUM PEAKS: 24\n+194.053642118165        0.17149988589657134\n+212.061782732582        0.3408023706085241\n+267.034768010352        0.14461973681405008\n+267.068824022318        0.9059077886912253\n+310.970568727813        0.10207376981761378\n+325.055731606087        0.2492913881575955\n+327.965549188207        0.17771113517875153\n+339.03821058645         0.2164141235131004\n+342.996634492902        0.12864401601229136\n+345.115297423962        0.12858631941521836\n+358.067243216398        0.11516054657713713\n+361.025211906011        0.21277786650130676\n+388.003000430725        0.1117630757647746\n+388.073272089579        0.1379380986368877\n+399.005054559559        0.13523814605127854\n+401.984326631505        0.18236859257167579\n+402.98179623463         0.11632404563967355\n+416.036473280551        0.23580021278191537\n+417.033665098569        0.13482894754843827\n+430.088321970134        0.3515024935084798\n+431.085366629672        0.22490708219361874\n+475.14184210128         1.0\n+565.143723544965        0.11382519851041557\n+625.181479977537        0.1264407374154073\n+\n'
b
diff -r 000000000000 -r 721a4e666191 test-data/model.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/model.json Tue Jun 27 14:30:10 2023 +0000
[
b'@@ -0,0 +1,1 @@\n+{"vector_size": 300, "index_to_key": ["peak@95.05", "peak@91.05", "peak@92.05", "peak@115.05", "peak@125.02", "peak@89.04", "peak@173.51", "peak@93.06", "peak@105.07", "peak@105.05", "peak@155.06", "peak@111.04", "peak@120.04", "peak@110.07", "peak@106.07", "peak@118.07", "peak@127.02", "peak@119.06", "peak@119.09", "peak@110.05", "peak@137.02", "peak@149.02", "peak@142.07", "peak@94.07", "peak@107.05", "peak@103.05", "peak@138.08", "peak@96.06", "peak@85.05", "peak@96.04", "peak@158.98", "peak@117.06", "peak@129.07", "peak@139.01", "peak@143.06", "peak@129.01", "peak@120.08", "peak@146.06", "peak@128.06", "peak@119.05", "peak@123.00", "peak@104.05", "peak@131.06", "peak@123.04", "peak@105.04", "peak@163.03", "peak@109.07", "peak@138.99", "peak@117.07", "peak@144.06", "peak@170.10", "peak@110.06", "peak@95.09", "peak@145.06", "peak@85.08", "peak@147.08", "peak@137.06", "peak@151.03", "peak@133.06", "peak@97.04", "peak@165.05", "peak@114.07", "peak@175.03", "peak@132.08", "peak@139.03", "peak@107.09", "peak@99.00", "peak@141.01", "peak@140.03", "peak@113.02", "peak@158.05", "peak@113.08", "peak@109.08", "peak@122.06", "peak@130.07", "peak@168.09", "peak@156.07", "peak@141.07", "peak@167.07", "peak@153.07", "peak@136.04", "peak@165.07", "peak@98.06", "peak@161.06", "peak@164.03", "peak@93.07", "peak@128.05", "peak@157.08", "peak@116.06", "peak@172.99", "peak@86.04", "peak@116.03", "peak@124.08", "peak@118.05", "peak@134.06", "peak@116.05", "peak@150.02", "peak@174.97", "peak@135.08", "peak@184.12", "peak@128.08", "peak@129.05", "peak@100.05", "peak@99.07", "peak@152.06", "peak@148.04", "peak@134.10", "peak@152.07", "peak@121.04", "peak@176.04", "peak@130.03", "peak@94.04", "peak@147.94", "peak@182.08", "peak@121.07", "peak@134.07", "peak@136.08", "peak@186.08", "peak@168.07", "peak@122.10", "peak@123.06", "peak@124.06", "peak@125.01", "peak@91.03", "peak@102.04", "peak@183.06", "peak@91.04", "peak@125.06", "peak@90.03", "peak@130.04", "peak@126.01", "peak@165.10", "peak@93.03", "peak@108.08", "peak@139.07", "peak@148.08", "peak@125.08", "peak@191.06", "peak@122.07", "peak@113.04", "peak@108.06", "peak@191.07", "peak@183.08", "peak@133.09", "peak@159.09", "peak@158.08", "peak@144.08", "peak@114.09", "peak@143.07", "peak@161.10", "peak@150.09", "peak@108.04", "peak@170.07", "peak@132.07", "peak@153.08", "peak@107.07", "peak@163.08", "peak@145.05", "peak@145.10", "peak@131.09", "peak@133.10", "peak@209.06", "peak@120.06", "peak@178.05", "peak@145.03", "peak@172.97", "peak@186.97", "peak@157.09", "peak@164.07", "peak@168.02", "peak@164.04", "peak@133.05", "peak@132.04", "peak@177.05", "peak@102.05", "peak@166.07", "peak@167.09", "peak@190.04", "peak@155.09", "peak@189.05", "peak@142.08", "peak@185.07", "peak@147.07", "peak@160.08", "peak@141.06", "peak@135.04", "peak@131.07", "peak@95.06", "peak@149.06", "peak@159.04", "peak@162.07", "peak@208.96", "peak@130.08", "peak@155.07", "peak@106.03", "peak@98.10", "peak@123.02", "peak@219.03", "peak@109.03", "peak@172.96", "peak@139.05", "peak@124.98", "peak@113.00", "peak@142.99", "peak@147.04", "peak@169.10", "peak@99.04", "peak@146.07", "peak@125.05", "peak@176.03", "peak@141.02", "peak@180.10", "peak@102.06", "peak@136.01", "peak@226.17", "peak@215.03", "peak@147.06", "peak@184.99", "peak@99.08", "peak@164.11", "peak@198.08", "peak@208.13", "peak@165.06", "peak@151.11", "peak@192.10", "peak@308.00", "peak@223.01", "peak@156.09", "peak@109.10", "peak@155.00", "peak@132.96", "peak@125.00", "peak@208.10", "peak@171.03", "peak@159.07", "peak@158.04", "peak@159.97", "peak@127.10", "peak@163.04", "peak@142.12", "peak@216.03", "peak@171.04", "peak@148.09", "peak@158.07", "peak@180.03", "peak@144.07", "peak@148.11", "peak@121.09", "peak@127.99", "peak@140.05", "peak@217.02", "peak@87.08", "peak@166.12", "peak@206.08", "peak@179.07", "peak@107.06", "peak@242.08", "peak@162.04", "peak@150.01", "peak@160.11", "peak@110.10", "peak@115.08", "peak@86.10", "peak@138.09", "peak@223.08'..b': 1240, "peak@155.02": 1241, "peak@226.13": 1242, "peak@210.10": 1243, "peak@209.11": 1244, "peak@196.09": 1245, "peak@185.11": 1246, "peak@102.03": 1247, "peak@187.09": 1248, "peak@188.08": 1249, "peak@216.06": 1250, "peak@161.03": 1251, "peak@174.02": 1252, "peak@175.05": 1253, "peak@119.04": 1254, "peak@134.04": 1255, "peak@150.04": 1256, "peak@151.10": 1257, "peak@163.01": 1258, "peak@188.09": 1259, "peak@163.05": 1260, "peak@170.04": 1261, "peak@178.03": 1262, "peak@179.00": 1263, "peak@205.04": 1264, "peak@214.04": 1265, "peak@155.04": 1266, "peak@147.03": 1267, "peak@146.02": 1268, "peak@134.05": 1269, "peak@101.02": 1270, "peak@98.04": 1271, "peak@366.15": 1272, "peak@231.10": 1273, "peak@230.09": 1274, "peak@215.11": 1275, "peak@214.10": 1276, "peak@202.10": 1277, "peak@201.10": 1278, "peak@200.08": 1279, "peak@199.09": 1280, "peak@199.07": 1281, "peak@189.10": 1282, "peak@247.07": 1283, "peak@260.07": 1284, "peak@273.06": 1285, "peak@139.12": 1286, "peak@211.00": 1287, "peak@86.07": 1288, "peak@96.08": 1289, "peak@136.09": 1290, "peak@137.09": 1291, "peak@138.10": 1292, "peak@151.12": 1293, "peak@224.18": 1294, "peak@179.13": 1295, "peak@180.15": 1296, "peak@191.12": 1297, "peak@192.15": 1298, "peak@194.13": 1299, "peak@208.14": 1300, "peak@167.97": 1301, "peak@152.98": 1302, "peak@139.97": 1303, "peak@136.00": 1304, "peak@134.99": 1305, "peak@121.01": 1306, "peak@111.03": 1307, "peak@108.00": 1308, "peak@107.00": 1309, "peak@96.00": 1310, "peak@90.97": 1311, "peak@377.14": 1312, "peak@349.11": 1313, "peak@347.09": 1314, "peak@337.11": 1315, "peak@335.13": 1316, "peak@334.08": 1317, "peak@209.18": 1318, "peak@237.21": 1319, "peak@273.08": 1320, "peak@226.12": 1321, "peak@172.98": 1322, "peak@204.02": 1323, "peak@206.02": 1324, "peak@220.02": 1325, "peak@250.02": 1326, "peak@225.11": 1327, "peak@251.11": 1328, "peak@272.11": 1329, "peak@261.09": 1330, "peak@279.10": 1331, "peak@325.05": 1332, "peak@325.14": 1333, "peak@325.24": 1334, "peak@263.97": 1335, "peak@168.05": 1336, "peak@179.09": 1337, "peak@177.08": 1338, "peak@138.06": 1339, "peak@178.09": 1340, "peak@150.13": 1341, "peak@150.05": 1342, "peak@149.05": 1343, "peak@114.05": 1344, "peak@166.03": 1345, "peak@148.02": 1346, "peak@143.02": 1347, "peak@138.04": 1348, "peak@99.03": 1349, "peak@89.01": 1350, "peak@86.99": 1351, "peak@103.00": 1352, "peak@333.11": 1353, "peak@331.10": 1354, "peak@321.11": 1355, "peak@127.03": 1356, "peak@186.07": 1357, "peak@90.01": 1358, "peak@117.02": 1359, "peak@334.17": 1360, "peak@110.04": 1361, "peak@111.00": 1362, "peak@155.03": 1363, "peak@319.10": 1364, "peak@159.02": 1365, "peak@190.10": 1366, "peak@161.05": 1367, "peak@186.05": 1368, "peak@160.99": 1369, "peak@176.99": 1370, "peak@133.07": 1371, "peak@200.12": 1372, "peak@173.11": 1373, "peak@115.04": 1374, "peak@376.04": 1375, "peak@265.95": 1376, "peak@279.13": 1377, "peak@219.11": 1378, "peak@178.04": 1379, "peak@166.04": 1380, "peak@220.13": 1381, "peak@192.14": 1382, "peak@162.13": 1383, "peak@147.10": 1384, "peak@330.09": 1385, "peak@288.09": 1386, "peak@286.07": 1387, "peak@204.00": 1388, "peak@183.97": 1389, "peak@184.98": 1390, "peak@174.07": 1391, "peak@241.09": 1392, "peak@226.06": 1393, "peak@220.07": 1394, "peak@219.07": 1395, "peak@211.08": 1396, "peak@205.05": 1397, "peak@198.07": 1398, "peak@197.06": 1399, "peak@193.09": 1400, "peak@193.05": 1401, "peak@189.09": 1402, "peak@188.05": 1403, "peak@187.04": 1404, "peak@175.04": 1405, "peak@164.05": 1406, "peak@196.98": 1407, "peak@161.02": 1408, "peak@151.08": 1409, "peak@150.07": 1410, "peak@148.05": 1411, "peak@139.08": 1412, "peak@280.01": 1413, "peak@254.04": 1414, "peak@253.03": 1415, "peak@245.04": 1416, "peak@244.03": 1417, "peak@237.06": 1418, "peak@225.03": 1419, "peak@213.98": 1420, "peak@212.97": 1421, "peak@90.09": 1422}, "norms": null, "mapfile_path": null, "__numpys": [], "__scipys": [], "__ignoreds": [], "__recursive_saveloads": [], "__weights_format": "np.ndarray"}\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 721a4e666191 test-data/model_100.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/model_100.json Tue Jun 27 14:30:10 2023 +0000
[
b'@@ -0,0 +1,1 @@\n+{"vector_size": 100, "index_to_key": ["peak@95.05", "peak@91.05", "peak@92.05", "peak@115.05", "peak@125.02", "peak@89.04", "peak@173.51", "peak@93.06", "peak@105.07", "peak@105.05", "peak@155.06", "peak@111.04", "peak@120.04", "peak@110.07", "peak@106.07", "peak@118.07", "peak@127.02", "peak@119.06", "peak@119.09", "peak@110.05", "peak@137.02", "peak@149.02", "peak@142.07", "peak@94.07", "peak@107.05", "peak@103.05", "peak@138.08", "peak@96.06", "peak@85.05", "peak@96.04", "peak@158.98", "peak@117.06", "peak@129.07", "peak@139.01", "peak@143.06", "peak@129.01", "peak@120.08", "peak@146.06", "peak@128.06", "peak@119.05", "peak@123.00", "peak@104.05", "peak@131.06", "peak@123.04", "peak@105.04", "peak@163.03", "peak@109.07", "peak@138.99", "peak@117.07", "peak@144.06", "peak@170.10", "peak@110.06", "peak@95.09", "peak@145.06", "peak@85.08", "peak@147.08", "peak@137.06", "peak@151.03", "peak@133.06", "peak@97.04", "peak@165.05", "peak@114.07", "peak@175.03", "peak@132.08", "peak@139.03", "peak@107.09", "peak@99.00", "peak@141.01", "peak@140.03", "peak@113.02", "peak@158.05", "peak@113.08", "peak@109.08", "peak@122.06", "peak@130.07", "peak@168.09", "peak@156.07", "peak@141.07", "peak@167.07", "peak@153.07", "peak@136.04", "peak@165.07", "peak@98.06", "peak@161.06", "peak@164.03", "peak@93.07", "peak@128.05", "peak@157.08", "peak@116.06", "peak@172.99", "peak@86.04", "peak@116.03", "peak@124.08", "peak@118.05", "peak@134.06", "peak@116.05", "peak@150.02", "peak@174.97", "peak@135.08", "peak@184.12", "peak@128.08", "peak@129.05", "peak@100.05", "peak@99.07", "peak@152.06", "peak@148.04", "peak@134.10", "peak@152.07", "peak@121.04", "peak@176.04", "peak@130.03", "peak@94.04", "peak@147.94", "peak@182.08", "peak@121.07", "peak@134.07", "peak@136.08", "peak@186.08", "peak@168.07", "peak@122.10", "peak@123.06", "peak@124.06", "peak@125.01", "peak@91.03", "peak@102.04", "peak@183.06", "peak@91.04", "peak@125.06", "peak@90.03", "peak@130.04", "peak@126.01", "peak@165.10", "peak@93.03", "peak@108.08", "peak@139.07", "peak@148.08", "peak@125.08", "peak@191.06", "peak@122.07", "peak@113.04", "peak@108.06", "peak@191.07", "peak@183.08", "peak@133.09", "peak@159.09", "peak@158.08", "peak@144.08", "peak@114.09", "peak@143.07", "peak@161.10", "peak@150.09", "peak@108.04", "peak@170.07", "peak@132.07", "peak@153.08", "peak@107.07", "peak@163.08", "peak@145.05", "peak@145.10", "peak@131.09", "peak@133.10", "peak@209.06", "peak@120.06", "peak@178.05", "peak@145.03", "peak@172.97", "peak@186.97", "peak@157.09", "peak@164.07", "peak@168.02", "peak@164.04", "peak@133.05", "peak@132.04", "peak@177.05", "peak@102.05", "peak@166.07", "peak@167.09", "peak@190.04", "peak@155.09", "peak@189.05", "peak@142.08", "peak@185.07", "peak@147.07", "peak@160.08", "peak@141.06", "peak@135.04", "peak@131.07", "peak@95.06", "peak@149.06", "peak@159.04", "peak@162.07", "peak@208.96", "peak@130.08", "peak@155.07", "peak@106.03", "peak@98.10", "peak@123.02", "peak@219.03", "peak@109.03", "peak@172.96", "peak@139.05", "peak@124.98", "peak@113.00", "peak@142.99", "peak@147.04", "peak@169.10", "peak@99.04", "peak@146.07", "peak@125.05", "peak@176.03", "peak@141.02", "peak@180.10", "peak@102.06", "peak@136.01", "peak@226.17", "peak@215.03", "peak@147.06", "peak@184.99", "peak@99.08", "peak@164.11", "peak@198.08", "peak@208.13", "peak@165.06", "peak@151.11", "peak@192.10", "peak@308.00", "peak@223.01", "peak@156.09", "peak@109.10", "peak@155.00", "peak@132.96", "peak@125.00", "peak@208.10", "peak@171.03", "peak@159.07", "peak@158.04", "peak@159.97", "peak@127.10", "peak@163.04", "peak@142.12", "peak@216.03", "peak@171.04", "peak@148.09", "peak@158.07", "peak@180.03", "peak@144.07", "peak@148.11", "peak@121.09", "peak@127.99", "peak@140.05", "peak@217.02", "peak@87.08", "peak@166.12", "peak@206.08", "peak@179.07", "peak@107.06", "peak@242.08", "peak@162.04", "peak@150.01", "peak@160.11", "peak@110.10", "peak@115.08", "peak@86.10", "peak@138.09", "peak@223.08'..b': 1240, "peak@155.02": 1241, "peak@226.13": 1242, "peak@210.10": 1243, "peak@209.11": 1244, "peak@196.09": 1245, "peak@185.11": 1246, "peak@102.03": 1247, "peak@187.09": 1248, "peak@188.08": 1249, "peak@216.06": 1250, "peak@161.03": 1251, "peak@174.02": 1252, "peak@175.05": 1253, "peak@119.04": 1254, "peak@134.04": 1255, "peak@150.04": 1256, "peak@151.10": 1257, "peak@163.01": 1258, "peak@188.09": 1259, "peak@163.05": 1260, "peak@170.04": 1261, "peak@178.03": 1262, "peak@179.00": 1263, "peak@205.04": 1264, "peak@214.04": 1265, "peak@155.04": 1266, "peak@147.03": 1267, "peak@146.02": 1268, "peak@134.05": 1269, "peak@101.02": 1270, "peak@98.04": 1271, "peak@366.15": 1272, "peak@231.10": 1273, "peak@230.09": 1274, "peak@215.11": 1275, "peak@214.10": 1276, "peak@202.10": 1277, "peak@201.10": 1278, "peak@200.08": 1279, "peak@199.09": 1280, "peak@199.07": 1281, "peak@189.10": 1282, "peak@247.07": 1283, "peak@260.07": 1284, "peak@273.06": 1285, "peak@139.12": 1286, "peak@211.00": 1287, "peak@86.07": 1288, "peak@96.08": 1289, "peak@136.09": 1290, "peak@137.09": 1291, "peak@138.10": 1292, "peak@151.12": 1293, "peak@224.18": 1294, "peak@179.13": 1295, "peak@180.15": 1296, "peak@191.12": 1297, "peak@192.15": 1298, "peak@194.13": 1299, "peak@208.14": 1300, "peak@167.97": 1301, "peak@152.98": 1302, "peak@139.97": 1303, "peak@136.00": 1304, "peak@134.99": 1305, "peak@121.01": 1306, "peak@111.03": 1307, "peak@108.00": 1308, "peak@107.00": 1309, "peak@96.00": 1310, "peak@90.97": 1311, "peak@377.14": 1312, "peak@349.11": 1313, "peak@347.09": 1314, "peak@337.11": 1315, "peak@335.13": 1316, "peak@334.08": 1317, "peak@209.18": 1318, "peak@237.21": 1319, "peak@273.08": 1320, "peak@226.12": 1321, "peak@172.98": 1322, "peak@204.02": 1323, "peak@206.02": 1324, "peak@220.02": 1325, "peak@250.02": 1326, "peak@225.11": 1327, "peak@251.11": 1328, "peak@272.11": 1329, "peak@261.09": 1330, "peak@279.10": 1331, "peak@325.05": 1332, "peak@325.14": 1333, "peak@325.24": 1334, "peak@263.97": 1335, "peak@168.05": 1336, "peak@179.09": 1337, "peak@177.08": 1338, "peak@138.06": 1339, "peak@178.09": 1340, "peak@150.13": 1341, "peak@150.05": 1342, "peak@149.05": 1343, "peak@114.05": 1344, "peak@166.03": 1345, "peak@148.02": 1346, "peak@143.02": 1347, "peak@138.04": 1348, "peak@99.03": 1349, "peak@89.01": 1350, "peak@86.99": 1351, "peak@103.00": 1352, "peak@333.11": 1353, "peak@331.10": 1354, "peak@321.11": 1355, "peak@127.03": 1356, "peak@186.07": 1357, "peak@90.01": 1358, "peak@117.02": 1359, "peak@334.17": 1360, "peak@110.04": 1361, "peak@111.00": 1362, "peak@155.03": 1363, "peak@319.10": 1364, "peak@159.02": 1365, "peak@190.10": 1366, "peak@161.05": 1367, "peak@186.05": 1368, "peak@160.99": 1369, "peak@176.99": 1370, "peak@133.07": 1371, "peak@200.12": 1372, "peak@173.11": 1373, "peak@115.04": 1374, "peak@376.04": 1375, "peak@265.95": 1376, "peak@279.13": 1377, "peak@219.11": 1378, "peak@178.04": 1379, "peak@166.04": 1380, "peak@220.13": 1381, "peak@192.14": 1382, "peak@162.13": 1383, "peak@147.10": 1384, "peak@330.09": 1385, "peak@288.09": 1386, "peak@286.07": 1387, "peak@204.00": 1388, "peak@183.97": 1389, "peak@184.98": 1390, "peak@174.07": 1391, "peak@241.09": 1392, "peak@226.06": 1393, "peak@220.07": 1394, "peak@219.07": 1395, "peak@211.08": 1396, "peak@205.05": 1397, "peak@198.07": 1398, "peak@197.06": 1399, "peak@193.09": 1400, "peak@193.05": 1401, "peak@189.09": 1402, "peak@188.05": 1403, "peak@187.04": 1404, "peak@175.04": 1405, "peak@164.05": 1406, "peak@196.98": 1407, "peak@161.02": 1408, "peak@151.08": 1409, "peak@150.07": 1410, "peak@148.05": 1411, "peak@139.08": 1412, "peak@280.01": 1413, "peak@254.04": 1414, "peak@253.03": 1415, "peak@245.04": 1416, "peak@244.03": 1417, "peak@237.06": 1418, "peak@225.03": 1419, "peak@213.98": 1420, "peak@212.97": 1421, "peak@90.09": 1422}, "norms": null, "mapfile_path": null, "__numpys": [], "__scipys": [], "__ignoreds": [], "__recursive_saveloads": [], "__weights_format": "np.ndarray"}\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 721a4e666191 test-data/model_vector_size_100.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/model_vector_size_100.json Tue Jun 27 14:30:10 2023 +0000
[
b'@@ -0,0 +1,1 @@\n+{"vector_size": 100, "index_to_key": ["peak@95.05", "peak@91.05", "peak@92.05", "peak@115.05", "peak@125.02", "peak@89.04", "peak@173.51", "peak@93.06", "peak@105.07", "peak@105.05", "peak@155.06", "peak@111.04", "peak@120.04", "peak@110.07", "peak@106.07", "peak@118.07", "peak@127.02", "peak@119.06", "peak@119.09", "peak@110.05", "peak@137.02", "peak@149.02", "peak@142.07", "peak@94.07", "peak@107.05", "peak@103.05", "peak@138.08", "peak@96.06", "peak@85.05", "peak@96.04", "peak@158.98", "peak@117.06", "peak@129.07", "peak@139.01", "peak@143.06", "peak@129.01", "peak@120.08", "peak@146.06", "peak@128.06", "peak@119.05", "peak@123.00", "peak@104.05", "peak@131.06", "peak@123.04", "peak@105.04", "peak@163.03", "peak@109.07", "peak@138.99", "peak@117.07", "peak@144.06", "peak@170.10", "peak@110.06", "peak@95.09", "peak@145.06", "peak@85.08", "peak@147.08", "peak@137.06", "peak@151.03", "peak@133.06", "peak@97.04", "peak@165.05", "peak@114.07", "peak@175.03", "peak@132.08", "peak@139.03", "peak@107.09", "peak@99.00", "peak@141.01", "peak@140.03", "peak@113.02", "peak@158.05", "peak@113.08", "peak@109.08", "peak@122.06", "peak@130.07", "peak@168.09", "peak@156.07", "peak@141.07", "peak@167.07", "peak@153.07", "peak@136.04", "peak@165.07", "peak@98.06", "peak@161.06", "peak@164.03", "peak@93.07", "peak@128.05", "peak@157.08", "peak@116.06", "peak@172.99", "peak@86.04", "peak@116.03", "peak@124.08", "peak@118.05", "peak@134.06", "peak@116.05", "peak@150.02", "peak@174.97", "peak@135.08", "peak@184.12", "peak@128.08", "peak@129.05", "peak@100.05", "peak@99.07", "peak@152.06", "peak@148.04", "peak@134.10", "peak@152.07", "peak@121.04", "peak@176.04", "peak@130.03", "peak@94.04", "peak@147.94", "peak@182.08", "peak@121.07", "peak@134.07", "peak@136.08", "peak@186.08", "peak@168.07", "peak@122.10", "peak@123.06", "peak@124.06", "peak@125.01", "peak@91.03", "peak@102.04", "peak@183.06", "peak@91.04", "peak@125.06", "peak@90.03", "peak@130.04", "peak@126.01", "peak@165.10", "peak@93.03", "peak@108.08", "peak@139.07", "peak@148.08", "peak@125.08", "peak@191.06", "peak@122.07", "peak@113.04", "peak@108.06", "peak@191.07", "peak@183.08", "peak@133.09", "peak@159.09", "peak@158.08", "peak@144.08", "peak@114.09", "peak@143.07", "peak@161.10", "peak@150.09", "peak@108.04", "peak@170.07", "peak@132.07", "peak@153.08", "peak@107.07", "peak@163.08", "peak@145.05", "peak@145.10", "peak@131.09", "peak@133.10", "peak@209.06", "peak@120.06", "peak@178.05", "peak@145.03", "peak@172.97", "peak@186.97", "peak@157.09", "peak@164.07", "peak@168.02", "peak@164.04", "peak@133.05", "peak@132.04", "peak@177.05", "peak@102.05", "peak@166.07", "peak@167.09", "peak@190.04", "peak@155.09", "peak@189.05", "peak@142.08", "peak@185.07", "peak@147.07", "peak@160.08", "peak@141.06", "peak@135.04", "peak@131.07", "peak@95.06", "peak@149.06", "peak@159.04", "peak@162.07", "peak@208.96", "peak@130.08", "peak@155.07", "peak@106.03", "peak@98.10", "peak@123.02", "peak@219.03", "peak@109.03", "peak@172.96", "peak@139.05", "peak@124.98", "peak@113.00", "peak@142.99", "peak@147.04", "peak@169.10", "peak@99.04", "peak@146.07", "peak@125.05", "peak@176.03", "peak@141.02", "peak@180.10", "peak@102.06", "peak@136.01", "peak@226.17", "peak@215.03", "peak@147.06", "peak@184.99", "peak@99.08", "peak@164.11", "peak@198.08", "peak@208.13", "peak@165.06", "peak@151.11", "peak@192.10", "peak@308.00", "peak@223.01", "peak@156.09", "peak@109.10", "peak@155.00", "peak@132.96", "peak@125.00", "peak@208.10", "peak@171.03", "peak@159.07", "peak@158.04", "peak@159.97", "peak@127.10", "peak@163.04", "peak@142.12", "peak@216.03", "peak@171.04", "peak@148.09", "peak@158.07", "peak@180.03", "peak@144.07", "peak@148.11", "peak@121.09", "peak@127.99", "peak@140.05", "peak@217.02", "peak@87.08", "peak@166.12", "peak@206.08", "peak@179.07", "peak@107.06", "peak@242.08", "peak@162.04", "peak@150.01", "peak@160.11", "peak@110.10", "peak@115.08", "peak@86.10", "peak@138.09", "peak@223.08'..b': 1240, "peak@155.02": 1241, "peak@226.13": 1242, "peak@210.10": 1243, "peak@209.11": 1244, "peak@196.09": 1245, "peak@185.11": 1246, "peak@102.03": 1247, "peak@187.09": 1248, "peak@188.08": 1249, "peak@216.06": 1250, "peak@161.03": 1251, "peak@174.02": 1252, "peak@175.05": 1253, "peak@119.04": 1254, "peak@134.04": 1255, "peak@150.04": 1256, "peak@151.10": 1257, "peak@163.01": 1258, "peak@188.09": 1259, "peak@163.05": 1260, "peak@170.04": 1261, "peak@178.03": 1262, "peak@179.00": 1263, "peak@205.04": 1264, "peak@214.04": 1265, "peak@155.04": 1266, "peak@147.03": 1267, "peak@146.02": 1268, "peak@134.05": 1269, "peak@101.02": 1270, "peak@98.04": 1271, "peak@366.15": 1272, "peak@231.10": 1273, "peak@230.09": 1274, "peak@215.11": 1275, "peak@214.10": 1276, "peak@202.10": 1277, "peak@201.10": 1278, "peak@200.08": 1279, "peak@199.09": 1280, "peak@199.07": 1281, "peak@189.10": 1282, "peak@247.07": 1283, "peak@260.07": 1284, "peak@273.06": 1285, "peak@139.12": 1286, "peak@211.00": 1287, "peak@86.07": 1288, "peak@96.08": 1289, "peak@136.09": 1290, "peak@137.09": 1291, "peak@138.10": 1292, "peak@151.12": 1293, "peak@224.18": 1294, "peak@179.13": 1295, "peak@180.15": 1296, "peak@191.12": 1297, "peak@192.15": 1298, "peak@194.13": 1299, "peak@208.14": 1300, "peak@167.97": 1301, "peak@152.98": 1302, "peak@139.97": 1303, "peak@136.00": 1304, "peak@134.99": 1305, "peak@121.01": 1306, "peak@111.03": 1307, "peak@108.00": 1308, "peak@107.00": 1309, "peak@96.00": 1310, "peak@90.97": 1311, "peak@377.14": 1312, "peak@349.11": 1313, "peak@347.09": 1314, "peak@337.11": 1315, "peak@335.13": 1316, "peak@334.08": 1317, "peak@209.18": 1318, "peak@237.21": 1319, "peak@273.08": 1320, "peak@226.12": 1321, "peak@172.98": 1322, "peak@204.02": 1323, "peak@206.02": 1324, "peak@220.02": 1325, "peak@250.02": 1326, "peak@225.11": 1327, "peak@251.11": 1328, "peak@272.11": 1329, "peak@261.09": 1330, "peak@279.10": 1331, "peak@325.05": 1332, "peak@325.14": 1333, "peak@325.24": 1334, "peak@263.97": 1335, "peak@168.05": 1336, "peak@179.09": 1337, "peak@177.08": 1338, "peak@138.06": 1339, "peak@178.09": 1340, "peak@150.13": 1341, "peak@150.05": 1342, "peak@149.05": 1343, "peak@114.05": 1344, "peak@166.03": 1345, "peak@148.02": 1346, "peak@143.02": 1347, "peak@138.04": 1348, "peak@99.03": 1349, "peak@89.01": 1350, "peak@86.99": 1351, "peak@103.00": 1352, "peak@333.11": 1353, "peak@331.10": 1354, "peak@321.11": 1355, "peak@127.03": 1356, "peak@186.07": 1357, "peak@90.01": 1358, "peak@117.02": 1359, "peak@334.17": 1360, "peak@110.04": 1361, "peak@111.00": 1362, "peak@155.03": 1363, "peak@319.10": 1364, "peak@159.02": 1365, "peak@190.10": 1366, "peak@161.05": 1367, "peak@186.05": 1368, "peak@160.99": 1369, "peak@176.99": 1370, "peak@133.07": 1371, "peak@200.12": 1372, "peak@173.11": 1373, "peak@115.04": 1374, "peak@376.04": 1375, "peak@265.95": 1376, "peak@279.13": 1377, "peak@219.11": 1378, "peak@178.04": 1379, "peak@166.04": 1380, "peak@220.13": 1381, "peak@192.14": 1382, "peak@162.13": 1383, "peak@147.10": 1384, "peak@330.09": 1385, "peak@288.09": 1386, "peak@286.07": 1387, "peak@204.00": 1388, "peak@183.97": 1389, "peak@184.98": 1390, "peak@174.07": 1391, "peak@241.09": 1392, "peak@226.06": 1393, "peak@220.07": 1394, "peak@219.07": 1395, "peak@211.08": 1396, "peak@205.05": 1397, "peak@198.07": 1398, "peak@197.06": 1399, "peak@193.09": 1400, "peak@193.05": 1401, "peak@189.09": 1402, "peak@188.05": 1403, "peak@187.04": 1404, "peak@175.04": 1405, "peak@164.05": 1406, "peak@196.98": 1407, "peak@161.02": 1408, "peak@151.08": 1409, "peak@150.07": 1410, "peak@148.05": 1411, "peak@139.08": 1412, "peak@280.01": 1413, "peak@254.04": 1414, "peak@253.03": 1415, "peak@245.04": 1416, "peak@244.03": 1417, "peak@237.06": 1418, "peak@225.03": 1419, "peak@213.98": 1420, "peak@212.97": 1421, "peak@90.09": 1422}, "norms": null, "mapfile_path": null, "__numpys": [], "__scipys": [], "__ignoreds": [], "__recursive_saveloads": [], "__weights_format": "np.ndarray"}\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 721a4e666191 test-data/ri_match_60.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ri_match_60.json Tue Jun 27 14:30:10 2023 +0000
[
b'@@ -0,0 +1,1 @@\n+{"__Scores__": true, "is_symmetric": false, "references": [{"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "YNPNZTXNASCQKK-UHFFFAOYSA-N", "smiles": "C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": {"74.01508": "Theoretical m/z 74.01565, Mass diff 0 (0 ppm), Formula C6H2", "75.02295": "Theoretical m/z 75.023475, Mass diff 0 (0 ppm), Formula C6H3", "76.03075": "Theoretical m/z 76.0313, Mass diff 0 (0 ppm), Formula C6H4", "87.02295": "Theoretical m/z 87.023475, Mass diff 0 (0 ppm), Formula C7H3", "88.03076": "Theoretical m/z 88.0313, Mass diff 0 (0 ppm), Formula C7H4", "89.03857": "Theoretical m/z 89.039125, Mass diff 0 (0 ppm), Formula C7H5", "151.05415": "Theoretical m/z 151.054775, Mass diff 0 (0 ppm), Formula C12H7", "175.05423": "Theoretical m/z 175.054775, Mass diff 0 (0 ppm), Formula C14H7", "176.062": "Theoretical m/z 176.0626, Mass diff 0 (0 ppm), Formula C14H8", "177.06982": "Theoretical m/z 177.070425, Mass diff 0 (0 ppm), Formula C14H9", "178.0775": "Theoretical m/z 178.077698, Mass diff 0 (1.11 ppm), SMILES C=1C=CC2=C(C1)C=CC=3C=CC=CC32, Annotation [C14H10]+, Rule of HR False"}, "num_peaks": "19", "compound_name": "Phenanthrene", "retention_time": null, "retention_index": 1832.9, "precursor_mz": 178.0775, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[74.01508, 137808.0], [75.02295, 278714.0], [76.03075, 608417.0], [87.02295, 304266.0], [88.03076, 497050.0], [89.03857, 441168.0], [98.01511, 150478.0], [150.04633, 868927.0], [151.05415, 546351.0], [152.0619, 2275502.0], [153.06528, 276320.0], [169.06468, 272559.0], [174.04636, 365846.0], [175.05423, 272039.0], [176.062, 3370523.0], [177.06982, 1751846.0], [178.0775, 13724432.0], [179.08078, 2250119.0], [180.08412, 138203.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "MWPLVEDNUUSJAV-UHFFFAOYSA-N", "smiles": "C1=CC2=CC3=C(C=CC=C3)C=C2C=C1", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": {"74.0151": "Theoretical m/z 74.01565, Mass diff 0 (0 ppm), Formula C6H2", "75.02296": "Theoretical m/z 75.023475, Mass diff 0 (0 ppm), Formula C6H3", "76.03077": "Theoretical m/z 76.0313, Mass diff 0 (0 ppm), Formula C6H4", "87.02297": "Theoretical m/z 87.023475, Mass diff 0 (0 ppm), Formula C7H3", "88.03078": "Theoretical m/z 88.0313, Mass diff 0 (0 ppm), Formula C7H4", "89.0386": "Theoretical m/z 89.039125, Mass diff 0 (0 ppm), Formula C7H5", "91.05425": "Theoretical m/z 91.054775, Mass diff 0 (0 ppm), Formula C7H7", "151.05415": "Theoretical m/z 151.054775, Mass diff 0 (0 ppm), Formula C12H7", "175.05423": "Theoretical m/z 175.054775, Mass diff 0 (0 ppm), Formula C14H7", "176.06204": "Theoretical m/z 176.0626, Mass diff 0 (0 ppm), Formula C14H8", "177.06984": "Theoretical m/z 177.070425, Mass diff 0 (0 ppm), Formula C14H9", "178.07754": "Theoretical m/z 178.077698, Mass diff 0 (0.89 ppm), SMILES C=1C=CC=2C=C3C=CC=CC3=CC2C1, Annotation [C14H10]+, Rule of HR False"}, "num_peaks": "21", "compound_name": "Anthracene", "retention_time": null, "retention_index": 1844.4, "precursor_mz": 178.07754, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[74.0151, 117371.0], [75.02296, 245305.0], [76.03077, 632686.0], [87.02297, 236214.0], [88.03078, 347832.0], [89.0386, 507288.0], [91.05425, 130861.0], [126.04636, 128356.0], [149.04478, 126331.0], [150.04637, 641829.0], [151.05415, 403195.0], [152.06195, 1369833.0], [153.06534, 175290.0], [169.06471, 228827.0], [174.04639, 255716.0], [175.05423, 198784.0], [176.06204, 2563053.0], [177.06984, 1182037.0], [178.07754, 11002398.0], [179.080'..b'320.0], [224.062, 181393.0], [226.07771, 1503774.0], [227.08588, 1198323.0], [228.09344, 2885510.0], [229.10123, 1791323.0], [230.10886, 17890056.0], [231.11214, 3536976.0], [232.11542, 320231.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C16H10S", "inchikey": "YEUHHUCOSQOCIX-UHFFFAOYSA-N", "smiles": "S1C2=C(C=CC=C2)C2=C1C1=CC=CC=C1C=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": {"91.05425": "Theoretical m/z 91.054775, Mass diff 0 (0 ppm), Formula C7H7", "163.05408": "Theoretical m/z 163.054775, Mass diff 0 (0 ppm), Formula C13H7", "187.05406": "Theoretical m/z 187.054775, Mass diff 0 (0 ppm), Formula C15H7", "189.06969": "Theoretical m/z 189.070425, Mass diff 0 (0 ppm), Formula C15H9", "201.06973": "Theoretical m/z 201.070425, Mass diff 0 (0 ppm), Formula C16H9", "233.04245": "Theoretical m/z 233.042496, Mass diff -0.001 (0 ppm), Formula C16H9S", "234.04965": "Theoretical m/z 234.049775, Mass diff 0 (0.53 ppm), SMILES S1C=2C=CC=CC2C=3C=CC=4C=CC=CC4C13, Annotation [C16H10S]+, Rule of HR False"}, "num_peaks": "23", "compound_name": "Benzo[b]naphtho[2,1-d]thiophene", "retention_time": null, "retention_index": 2419.3, "precursor_mz": 234.04965, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[91.05425, 49130.0], [93.52686, 61653.0], [94.53469, 76638.0], [104.01679, 126871.0], [116.01675, 112547.0], [117.02459, 99108.0], [162.04623, 54491.0], [163.05408, 136418.0], [164.06198, 56321.0], [187.05406, 273349.0], [188.06189, 200750.0], [189.06969, 453336.0], [190.07306, 77884.0], [200.06189, 135620.0], [201.06973, 74692.0], [202.07758, 425058.0], [203.08086, 72058.0], [226.07762, 63460.0], [232.03409, 769638.0], [233.04245, 382457.0], [234.04965, 4404102.0], [235.05304, 725574.0], [236.04547, 251450.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C17H12", "inchikey": "HAPOJKSPCGLOOD-UHFFFAOYSA-N", "smiles": "C1C2=CC=CC=C2C2=C1C=C1C=CC=CC1=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": {"163.05414": "Theoretical m/z 163.054775, Mass diff 0 (0 ppm), Formula C13H7", "187.05412": "Theoretical m/z 187.054775, Mass diff 0 (0 ppm), Formula C15H7", "189.06975": "Theoretical m/z 189.070425, Mass diff 0 (0 ppm), Formula C15H9", "211.05426": "Theoretical m/z 211.054775, Mass diff 0 (0 ppm), Formula C17H7", "213.0699": "Theoretical m/z 213.070425, Mass diff 0 (0 ppm), Formula C17H9", "215.0855": "Theoretical m/z 215.086075, Mass diff 0 (0 ppm), Formula C17H11", "216.09326": "Theoretical m/z 216.093354, Mass diff 0 (0.44 ppm), SMILES C=1C=CC=2C=C3C(=CC2C1)C=4C=CC=CC4C3, Annotation [C17H12]+, Rule of HR False"}, "num_peaks": "21", "compound_name": "2,3-Benzofluorene", "retention_time": null, "retention_index": 2257.5, "precursor_mz": 216.09326, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[93.52689, 122035.0], [94.53471, 241743.0], [95.03638, 42130.0], [105.52688, 47526.0], [106.53471, 171653.0], [107.03639, 60004.0], [107.5425, 180288.0], [108.03385, 97471.0], [163.05414, 68194.0], [187.05412, 167183.0], [188.06201, 86262.0], [189.06975, 239421.0], [190.07321, 37778.0], [211.05426, 117044.0], [212.06215, 60813.0], [213.0699, 808499.0], [214.07317, 202222.0], [215.0855, 3634570.0], [216.09326, 2535030.0], [217.09671, 444170.0], [218.09999, 37976.0]]}], "n_row": 7, "n_col": 7, "row": [0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 6, 6], "col": [0, 1, 0, 1, 2, 3, 2, 3, 4, 3, 4, 6, 5, 4, 6], "data": [[true], [true], [true], [true], [true], [true], [true], [true], [true], [true], [true], [true], [true], [true], [true]], "dtype": [["MetadataMatch_retention_index_difference_60.0_f0", "|b1"]]}\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 721a4e666191 test-data/s2v_scores_test1_out.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/s2v_scores_test1_out.json Tue Jun 27 14:30:10 2023 +0000
[
b'@@ -0,0 +1,1 @@\n+{"__Scores__": true, "is_symmetric": false, "references": [{"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C20H12", "inchikey": "CSHWQDPOILHKBI-UHFFFAOYSA-N", "smiles": "C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "compound_name": "Perylene", "retention_time": null, "retention_index": 2886.9, "precursor_mz": 252.09323, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "charge": 1, "parent_mass": "251.08595400000002", "peak_comments": {"252.09323": "Theoretical m/z 252.093354, Mass diff 0 (0.49 ppm), SMILES C1=CC=2C=CC=C3C4=CC=CC5=CC=CC(C(=C1)C23)=C54, Annotation [C20H12]+, Rule of HR False"}, "num_peaks": "3", "peaks_json": [[250.07765, 0.3282529462971431], [252.09323, 1.0], [253.09656, 0.20573802940517583]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "YNPNZTXNASCQKK-UHFFFAOYSA-N", "smiles": "C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "compound_name": "Phenanthrene", "retention_time": null, "retention_index": 1832.9, "precursor_mz": 178.0775, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "charge": 1, "parent_mass": "177.070224", "peak_comments": {"176.062": "Theoretical m/z 176.0626, Mass diff 0 (0 ppm), Formula C14H8", "177.06982": "Theoretical m/z 177.070425, Mass diff 0 (0 ppm), Formula C14H9", "178.0775": "Theoretical m/z 178.077698, Mass diff 0 (1.11 ppm), SMILES C=1C=CC2=C(C1)C=CC=3C=CC=CC32, Annotation [C14H10]+, Rule of HR False"}, "num_peaks": "5", "peaks_json": [[152.0619, 0.1657993569424221], [176.062, 0.24558560966311757], [177.06982, 0.12764433529926775], [178.0775, 1.0], [179.08078, 0.16394988149600653]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "MWPLVEDNUUSJAV-UHFFFAOYSA-N", "smiles": "C1=CC2=CC3=C(C=CC=C3)C=C2C=C1", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "compound_name": "Anthracene", "retention_time": null, "retention_index": 1844.4, "precursor_mz": 178.07754, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "charge": 1, "parent_mass": "177.070264", "peak_comments": {"176.06204": "Theoretical m/z 176.0626, Mass diff 0 (0 ppm), Formula C14H8", "177.06984": "Theoretical m/z 177.070425, Mass diff 0 (0 ppm), Formula C14H9", "178.07754": "Theoretical m/z 178.077698, Mass diff 0 (0.89 ppm), SMILES C=1C=CC=2C=C3C=CC=CC3=CC2C1, Annotation [C14H10]+, Rule of HR False"}, "num_peaks": "5", "peaks_json": [[152.06195, 0.12450313104470498], [176.06204, 0.23295403420236208], [177.06984, 0.1074344883724439], [178.07754, 1.0], [179.08081, 0.1616741186784917]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C12H10", "inchikey": "CWRYPZZKDGJXCA-UHFFFAOYSA-N", "smiles": "C1CC2=C3C1=CC=CC3=CC=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "compound_name": "Acenaphthene", "retention_time": null, "retention_index": 1528.3, "precursor_mz": 154.07741, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "charge": 1, "parent_mass": "153.070134", "peak_comments": {"151.05418": "Theoretical m/z 151.054775, Mass diff 0 (0 ppm), Formula C12H7", "153.06969": "Theoretical m/z 153.070425, Mass diff 0 (0 ppm), Formula C12H9", "154.07741": "Theoretical m/z 154.077698, Mass diff 0 (1.87 ppm), SMILES C=1C=C2C=CC=C3C2=C(C1)CC3, Annotation [C12H10]+, Rule of HR False"}, "num_peaks":'..b'608915, 0.4261904641405732], [180.961381219577, 0.19044664411406823], [195.995454001979, 0.4349696378594456], [196.992752093539, 0.3065603768824057], [197.974508055988, 0.395078144825856], [198.95441899853, 0.20984023997622794], [212.970014961884, 0.312364140356306], [213.006147303489, 0.3451086913299261], [214.98560992714, 0.5058106319186024], [220.957069324217, 0.1741467403445935], [222.936453968368, 0.1925777417566011], [229.019119616188, 1.0], [230.00074243328, 0.4313373151032761], [236.98832473252, 0.17144823687024496], [240.946899738586, 0.37016492220221137], [253.982966844011, 0.2619026170220304], [269.977845781938, 0.369206087321791], [270.975426915952, 0.26702156040941716], [270.993770069105, 0.5558156567281477], [297.079403794841, 0.3276080388276794], [298.934550867121, 0.27505460413538213], [328.980928284848, 0.30053783536988743], [330.960641905517, 0.36106011190503906], [372.953096157169, 0.24565144306023193]]}, {"ionmode": "negative", "spectrumtype": "Centroid", "compound_name": "C018", "retention_time": 687.52, "retention_index": 4133.690476, "charge": -1, "num_peaks": "9", "peaks_json": [[209.011554808631, 0.6525630494040975], [209.029113642814, 0.10839584784422365], [210.990970491228, 0.15000447517519525], [226.041539764125, 0.3130785916148705], [227.039590062704, 0.17080278851135594], [265.020169474237, 0.2407632085362116], [281.051364215883, 1.0], [282.049803555434, 0.21798058347517868], [285.009414657485, 0.12222513080126347]]}, {"ionmode": "negative", "spectrumtype": "Centroid", "compound_name": "C012", "retention_time": 687.83, "retention_index": 4134.613095, "charge": -1, "num_peaks": "27", "peaks_json": [[121.046741626167, 0.12537265500763864], [192.997735629267, 0.1940732067111916], [205.016711322548, 0.12056941717717026], [209.098935410027, 0.12559326156418593], [213.05754958053, 0.27142481482801556], [253.016653119059, 0.8938154160284864], [271.027137265637, 0.1375157398831428], [284.047997947258, 0.1709644868199992], [325.985953604199, 0.17320279707305308], [327.03525664153, 1.0], [328.035343658, 0.26668492712822667], [345.046169172075, 0.17928530473894783], [359.09769204202, 0.2008606829314974], [360.028136457829, 0.21741111817985187], [387.002413998209, 0.2632563912022333], [387.071825807939, 0.3071277807533759], [402.055174276849, 0.2538777644377674], [461.090256355859, 0.3629780005380489], [462.091555156095, 0.14179921776218857], [475.072310244956, 0.27595658697326014], [476.143434019655, 0.2650001066007337], [477.140480798609, 0.10059042565917092], [490.12632015072, 0.3700192231231476], [491.122346777972, 0.20934752539009766], [549.162553092955, 0.5034364902937184], [563.144239233773, 0.2590935918637112], [624.183209223215, 0.1451500409395642]]}, {"ionmode": "negative", "spectrumtype": "Centroid", "compound_name": "C020", "retention_time": 687.99, "retention_index": 4135.089286, "charge": -1, "num_peaks": "24", "peaks_json": [[194.053642118165, 0.17149988589657134], [212.061782732582, 0.3408023706085241], [267.034768010352, 0.14461973681405008], [267.068824022318, 0.9059077886912253], [310.970568727813, 0.10207376981761378], [325.055731606087, 0.2492913881575955], [327.965549188207, 0.17771113517875153], [339.03821058645, 0.2164141235131004], [342.996634492902, 0.12864401601229136], [345.115297423962, 0.12858631941521836], [358.067243216398, 0.11516054657713713], [361.025211906011, 0.21277786650130676], [388.003000430725, 0.1117630757647746], [388.073272089579, 0.1379380986368877], [399.005054559559, 0.13523814605127854], [401.984326631505, 0.18236859257167579], [402.98179623463, 0.11632404563967355], [416.036473280551, 0.23580021278191537], [417.033665098569, 0.13482894754843827], [430.088321970134, 0.3515024935084798], [431.085366629672, 0.22490708219361874], [475.14184210128, 1.0], [565.143723544965, 0.11382519851041557], [625.181479977537, 0.1264407374154073]]}], "n_row": 133, "n_col": 51, "row": [], "col": [], "data": [], "dtype": [["Spec2Vec_0.0_1.0", "<f8"]]}\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 721a4e666191 test-data/s2v_scores_test2_out.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/s2v_scores_test2_out.json Tue Jun 27 14:30:10 2023 +0000
[
b'@@ -0,0 +1,1 @@\n+{"__Scores__": true, "is_symmetric": false, "references": [{"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "YNPNZTXNASCQKK-UHFFFAOYSA-N", "smiles": "C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "19", "compound_name": "Phenanthrene", "retention_time": null, "retention_index": 1832.9, "precursor_mz": 178.0775, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[74.01508, 137808.0], [75.02295, 278714.0], [76.03075, 608417.0], [87.02295, 304266.0], [88.03076, 497050.0], [89.03857, 441168.0], [98.01511, 150478.0], [150.04633, 868927.0], [151.05415, 546351.0], [152.0619, 2275502.0], [153.06528, 276320.0], [169.06468, 272559.0], [174.04636, 365846.0], [175.05423, 272039.0], [176.062, 3370523.0], [177.06982, 1751846.0], [178.0775, 13724432.0], [179.08078, 2250119.0], [180.08412, 138203.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "MWPLVEDNUUSJAV-UHFFFAOYSA-N", "smiles": "C1=CC2=CC3=C(C=CC=C3)C=C2C=C1", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "21", "compound_name": "Anthracene", "retention_time": null, "retention_index": 1844.4, "precursor_mz": 178.07754, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[74.0151, 117371.0], [75.02296, 245305.0], [76.03077, 632686.0], [87.02297, 236214.0], [88.03078, 347832.0], [89.0386, 507288.0], [91.05425, 130861.0], [126.04636, 128356.0], [149.04478, 126331.0], [150.04637, 641829.0], [151.05415, 403195.0], [152.06195, 1369833.0], [153.06534, 175290.0], [169.06471, 228827.0], [174.04639, 255716.0], [175.05423, 198784.0], [176.06204, 2563053.0], [177.06984, 1182037.0], [178.07754, 11002398.0], [179.08081, 1778803.0], [180.08418, 132922.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C16H10", "inchikey": "GVEPBJHOBDJJJI-UHFFFAOYSA-N", "smiles": "C1=CC2=C(C=C1)C1=C3C2=CC=CC3=CC=C1", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "17", "compound_name": "Fluoranthene", "retention_time": null, "retention_index": 2102.7, "precursor_mz": 202.07756, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[75.02299, 112456.0], [87.02298, 183640.0], [88.03079, 367434.0], [99.02296, 124952.0], [100.03078, 376079.0], [101.03863, 381288.0], [150.04642, 86059.0], [174.04634, 246963.0], [176.06194, 141676.0], [198.0464, 244370.0], [199.05429, 285767.0], [200.06207, 1958890.0], [201.06982, 1103710.0], [202.07756, 8104188.0], [203.08084, 1377015.0], [204.08421, 98067.0], [219.08043, 186623.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C16H10", "inchikey": "BBEAQIROQSPTKN-UHFFFAOYSA-N", "smiles": "C1=CC2=C3C(C=CC4=CC=CC(C=C2)=C34)=C1", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "16", "compound_name": "Pyrene", "retention_time": null, "retention_index": 2154.5, "precursor_mz": 202.07759, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[87.02299, 147113.0], [88.0308, 305149.0], [99.02298, 137042.0], [100.0308, 508914.0], [101.03864, 472094.0], [101.54032, 82430.0], [150.04637, 80741.0], [174.04631, 212706.0], [198.04643, 262925.0], [199.05429, 295144.0], [200.06209, 1960712.0], [201.06982, 1270466.0], [202.'..b'], [203.08086, 1335645.0], [204.0843, 121774.0], [219.08044, 182944.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C18H14", "inchikey": "XJKSTNDFUHDPQJ-UHFFFAOYSA-N", "smiles": "C1=CC=C(C=C1)C2=CC=C(C=C2)C3=CC=CC=C3", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "25", "compound_name": "para-Terphenyl", "retention_time": null, "retention_index": 2207.5, "precursor_mz": 230.10886, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[88.03077, 233962.0], [101.03861, 353544.0], [113.03854, 437845.0], [115.05423, 690291.0], [128.062, 293046.0], [151.05412, 266397.0], [152.06195, 961184.0], [153.06976, 409346.0], [176.06192, 232793.0], [189.06972, 408734.0], [200.0619, 342984.0], [201.06986, 238974.0], [202.07758, 1292332.0], [203.08093, 374439.0], [213.06973, 256631.0], [215.08548, 1220866.0], [216.08881, 217320.0], [224.062, 181393.0], [226.07771, 1503774.0], [227.08588, 1198323.0], [228.09344, 2885510.0], [229.10123, 1791323.0], [230.10886, 17890056.0], [231.11214, 3536976.0], [232.11542, 320231.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C16H10S", "inchikey": "YEUHHUCOSQOCIX-UHFFFAOYSA-N", "smiles": "S1C2=C(C=CC=C2)C2=C1C1=CC=CC=C1C=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "23", "compound_name": "Benzo[b]naphtho[2,1-d]thiophene", "retention_time": null, "retention_index": 2419.3, "precursor_mz": 234.04965, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[91.05425, 49130.0], [93.52686, 61653.0], [94.53469, 76638.0], [104.01679, 126871.0], [116.01675, 112547.0], [117.02459, 99108.0], [162.04623, 54491.0], [163.05408, 136418.0], [164.06198, 56321.0], [187.05406, 273349.0], [188.06189, 200750.0], [189.06969, 453336.0], [190.07306, 77884.0], [200.06189, 135620.0], [201.06973, 74692.0], [202.07758, 425058.0], [203.08086, 72058.0], [226.07762, 63460.0], [232.03409, 769638.0], [233.04245, 382457.0], [234.04965, 4404102.0], [235.05304, 725574.0], [236.04547, 251450.0]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C17H12", "inchikey": "HAPOJKSPCGLOOD-UHFFFAOYSA-N", "smiles": "C1C2=CC=CC=C2C2=C1C=C1C=CC=CC1=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "peak_comments": null, "num_peaks": "21", "compound_name": "2,3-Benzofluorene", "retention_time": null, "retention_index": 2257.5, "precursor_mz": 216.09326, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "peaks_json": [[93.52689, 122035.0], [94.53471, 241743.0], [95.03638, 42130.0], [105.52688, 47526.0], [106.53471, 171653.0], [107.03639, 60004.0], [107.5425, 180288.0], [108.03385, 97471.0], [163.05414, 68194.0], [187.05412, 167183.0], [188.06201, 86262.0], [189.06975, 239421.0], [190.07321, 37778.0], [211.05426, 117044.0], [212.06215, 60813.0], [213.0699, 808499.0], [214.07317, 202222.0], [215.0855, 3634570.0], [216.09326, 2535030.0], [217.09671, 444170.0], [218.09999, 37976.0]]}], "n_row": 7, "n_col": 7, "row": [0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 6, 6], "col": [0, 1, 0, 1, 2, 3, 2, 3, 4, 3, 4, 6, 5, 4, 6], "data": [[true, 1.0], [true, 0.9999951682708088], [true, 0.9999951682708088], [true, 1.0], [true, 1.0], [true, 1.0], [true, 1.0], [true, 1.0], [true, 0.9998494302807368], [true, 0.9998494302807368], [true, 1.0], [true, 0.9998572581666383], [true, 1.0], [true, 0.9998572581666383], [true, 1.0]], "dtype": [["MetadataMatch_retention_index_difference_60.0_f0", "|b1"], ["Spec2Vec_0.0_1.0", "<f8"]]}\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 721a4e666191 test-data/weights_100.binary
b
Binary file test-data/weights_100.binary has changed