Repository 'matchms_split'
hg clone https://toolshed.g2.bx.psu.edu/repos/recetox/matchms_split

Changeset 14:114617e6ad33 (2024-02-05)
Previous changeset 13:fc1bc38ede0b (2024-01-15) Next changeset 15:14a792c45c0b (2024-02-22)
Commit message:
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit da193865f41a3a840ecc4ba0afab1d358554998a
modified:
matchms_split.py
matchms_split.xml
test-data/convert/metadata.csv
test-data/split/chunk-size/chunk_0.msp
test-data/split/chunk-size/chunk_1.msp
test-data/split/chunk-size/chunk_2.msp
test-data/split/num-chunks/chunk_0.msp
test-data/split/num-chunks/chunk_1.msp
test-data/subsetting/identifier.csv
test-data/subsetting/subsetting_output.msp
test-data/subsetting/subsetting_output2.msp
added:
test-data/add_key/add_key_test2.msp
test-data/add_key/add_key_test2_out.msp
test-data/add_key/out_matchms_add_key.msp
test-data/split/one-per-file/0.msp
test-data/split/one-per-file/1.msp
test-data/split/one-per-file/2.msp
test-data/split/one-per-file/3.msp
test-data/split/one-per-file/4.msp
test-data/split/one-per-file/5.msp
test-data/split/one-per-file/6.msp
test-data/split/one-per-file/7.msp
test-data/split/one-per-file/8.msp
test-data/split/one-per-file/9.msp
removed:
test-data/out_matchms_add_key.msp
test-data/split/one-per-file/1NITROPYRENE.msp
test-data/split/one-per-file/23DICHLOROPHENOL.msp
test-data/split/one-per-file/245TRICHLOROPHENOL.msp
test-data/split/one-per-file/246TRICHLOROPHENOL.msp
test-data/split/one-per-file/24DICHLOROPHENOL.msp
test-data/split/one-per-file/24DINITROPHENOL.msp
test-data/split/one-per-file/25DICHLOROPHENOL.msp
test-data/split/one-per-file/26DICHLOROPHENOL.msp
test-data/split/one-per-file/34DICHLOROPHENOL.msp
test-data/split/one-per-file/35DICHLOROPHENOL.msp
b
diff -r fc1bc38ede0b -r 114617e6ad33 matchms_split.py
--- a/matchms_split.py Mon Jan 15 12:28:02 2024 +0000
+++ b/matchms_split.py Mon Feb 05 10:35:49 2024 +0000
[
@@ -1,22 +1,13 @@
 import argparse
 import itertools
 import os
-from typing import List
 
+import matchms
 from matchms.exporting import save_as_msp
 from matchms.importing import load_from_msp
 
 
-def get_spectra_names(spectra: list) -> List[str]:
-    """Read the keyword 'compound_name' from a spectra.
-
-    Args:
-        spectra (list): List of individual spectra.
-
-    Returns:
-        List[str]: List with 'compoud_name' of individual spectra.
-    """
-    return [x.get("compound_name") for x in spectra]
+matchms.Metadata.set_key_replacements({})
 
 
 def make_outdir(outdir: str):
@@ -35,23 +26,8 @@
         spectra (List[Spectrum]): Spectra to write to file
         outdir   (str): Path to destination directory.
     """
-    names = get_spectra_names(spectra)
     for i in range(len(spectra)):
-        outpath = assemble_outpath(names[i], outdir)
-        save_as_msp(spectra[i], outpath)
-
-
-def assemble_outpath(name, outdir):
-    """Filter special chracteres from name.
-
-    Args:
-        name   (str): Name to be filetered.
-        outdir (str): Path to destination directory.
-    """
-    filename = ''.join(filter(str.isalnum, name))
-    outfile = str(filename) + ".msp"
-    outpath = os.path.join(outdir, outfile)
-    return outpath
+        save_as_msp(spectra[i], os.path.join(outdir, f"{i}.msp"))
 
 
 def split_round_robin(iterable, num_chunks):
@@ -76,7 +52,7 @@
 
 
 if __name__ == "__main__":
-    spectra = load_from_msp(filename, metadata_harmonization=True)
+    spectra = load_from_msp(filename, metadata_harmonization=False)
     make_outdir(outdir)
 
     if method == "one-per-file":
b
diff -r fc1bc38ede0b -r 114617e6ad33 matchms_split.xml
--- a/matchms_split.xml Mon Jan 15 12:28:02 2024 +0000
+++ b/matchms_split.xml Mon Feb 05 10:35:49 2024 +0000
b
@@ -1,4 +1,4 @@
-<tool id="matchms_split" name="matchms split library" version="@TOOL_VERSION@+galaxy0" profile="21.09">
+<tool id="matchms_split" name="matchms split library" version="@TOOL_VERSION@+galaxy1" profile="21.09">
     <description>split a large library into subsets</description>
     <macros>
         <import>macros.xml</import>
@@ -53,16 +53,16 @@
             <param name="msp_input" value="split/sample_input.msp"  />
             <param name="split_type" value="one-per-file" />
             <output_collection name="sample" type="list">
-                <element name="1NITROPYRENE"         file="split/one-per-file/1NITROPYRENE.msp"         ftype="msp" compare="diff"/>
-                <element name="23DICHLOROPHENOL"     file="split/one-per-file/23DICHLOROPHENOL.msp"     ftype="msp" compare="diff"/>
-                <element name="245TRICHLOROPHENOL"   file="split/one-per-file/245TRICHLOROPHENOL.msp"   ftype="msp" compare="diff"/>
-                <element name="246TRICHLOROPHENOL"   file="split/one-per-file/246TRICHLOROPHENOL.msp"   ftype="msp" compare="diff"/>
-                <element name="24DICHLOROPHENOL"     file="split/one-per-file/24DICHLOROPHENOL.msp"     ftype="msp" compare="diff"/>
-                <element name="24DINITROPHENOL"      file="split/one-per-file/24DINITROPHENOL.msp"      ftype="msp" compare="diff"/>
-                <element name="25DICHLOROPHENOL"     file="split/one-per-file/25DICHLOROPHENOL.msp"     ftype="msp" compare="diff"/>
-                <element name="26DICHLOROPHENOL"     file="split/one-per-file/26DICHLOROPHENOL.msp"     ftype="msp" compare="diff"/>
-                <element name="34DICHLOROPHENOL"     file="split/one-per-file/34DICHLOROPHENOL.msp"     ftype="msp" compare="diff"/>
-                <element name="35DICHLOROPHENOL"     file="split/one-per-file/35DICHLOROPHENOL.msp"     ftype="msp" compare="diff"/>
+                <element name="0"         file="split/one-per-file/0.msp"         ftype="msp" compare="diff"/>
+                <element name="1"     file="split/one-per-file/1.msp"     ftype="msp" compare="diff"/>
+                <element name="2"   file="split/one-per-file/2.msp"   ftype="msp" compare="diff"/>
+                <element name="3"   file="split/one-per-file/3.msp"   ftype="msp" compare="diff"/>
+                <element name="4"     file="split/one-per-file/4.msp"     ftype="msp" compare="diff"/>
+                <element name="5"      file="split/one-per-file/5.msp"      ftype="msp" compare="diff"/>
+                <element name="6"     file="split/one-per-file/6.msp"     ftype="msp" compare="diff"/>
+                <element name="7"     file="split/one-per-file/7.msp"     ftype="msp" compare="diff"/>
+                <element name="8"     file="split/one-per-file/8.msp"     ftype="msp" compare="diff"/>
+                <element name="9"     file="split/one-per-file/9.msp"     ftype="msp" compare="diff"/>
             </output_collection>
         </test>
         <test>
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/add_key/add_key_test2.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/add_key/add_key_test2.msp Mon Feb 05 10:35:49 2024 +0000
b
@@ -0,0 +1,46 @@
+SCANNUMBER: -1
+IONMODE: positive
+SPECTRUMTYPE: Centroid
+FORMULA: C20H12
+INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N
+SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2
+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
+IONIZATION: EI+
+LICENSE: CC BY-NC
+COMPOUND_NAME: Perylene
+RETENTION_TIME: None
+RETENTION_INDEX: 2886.9
+COLLISION_ENERGY: 70eV
+INSTRUMENT_TYPE: GC-EI-Orbitrap
+CHARGE: 1
+PARENT_MASS: 251.08595400000002
+NUM PEAKS: 3
+250.07765   0.3282529462971431
+252.09323   1.0
+253.09656   0.20573802940517583
+
+SCANNUMBER: -1
+IONMODE: positive
+SPECTRUMTYPE: Centroid
+FORMULA: C14H10
+INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N
+SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2
+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
+IONIZATION: EI+
+LICENSE: CC BY-NC
+COMPOUND_NAME: Phenanthrene
+RETENTION_TIME: None
+RETENTION_INDEX: 1832.9
+COLLISION_ENERGY: 70eV
+INSTRUMENT_TYPE: GC-EI-Orbitrap
+CHARGE: 1
+PARENT_MASS: 177.070224
+NUM PEAKS: 5
+152.0619    0.1657993569424221
+176.062     0.24558560966311757
+177.06982   0.12764433529926775
+178.0775    1.0
+179.08078   0.16394988149600653
+
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/add_key/add_key_test2_out.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/add_key/add_key_test2_out.msp Mon Feb 05 10:35:49 2024 +0000
[
@@ -0,0 +1,48 @@
+SCANNUMBER: -1
+IONMODE: positive
+SPECTRUMTYPE: Centroid
+FORMULA: C20H12
+INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N
+SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2
+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
+IONIZATION: EI+
+LICENSE: CC BY-NC
+COMPOUND_NAME: Perylene
+RETENTION_TIME: None
+RETENTION_INDEX: 2886.9
+COLLISION_ENERGY: 70eV
+INSTRUMENT_TYPE: GC-EI-Orbitrap
+CHARGE: 1
+PARENT_MASS: 251.08595400000002
+ADDUCT: [M]+
+NUM PEAKS: 3
+250.07765   0.3282529462971431
+252.09323   1.0
+253.09656   0.20573802940517583
+
+SCANNUMBER: -1
+IONMODE: positive
+SPECTRUMTYPE: Centroid
+FORMULA: C14H10
+INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N
+SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2
+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
+IONIZATION: EI+
+LICENSE: CC BY-NC
+COMPOUND_NAME: Phenanthrene
+RETENTION_TIME: None
+RETENTION_INDEX: 1832.9
+COLLISION_ENERGY: 70eV
+INSTRUMENT_TYPE: GC-EI-Orbitrap
+CHARGE: 1
+PARENT_MASS: 177.070224
+ADDUCT: [M]+
+NUM PEAKS: 5
+152.0619    0.1657993569424221
+176.062     0.24558560966311757
+177.06982   0.12764433529926775
+178.0775    1.0
+179.08078   0.16394988149600653
+
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/add_key/out_matchms_add_key.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/add_key/out_matchms_add_key.msp Mon Feb 05 10:35:49 2024 +0000
b
@@ -0,0 +1,199 @@
+NAME: C001
+IONMODE: Negative
+RETENTIONTIME: 38.74
+RETENTIONINDEX: -1
+SPECTRUMTYPE: Centroid
+TOOL_USED: matchms
+NUM PEAKS: 57
+138.9121    10186226.0
+148.9337    1008656.0
+175.0641    26780143.0
+186.1095    2675456.0
+196.8658    21390430.0
+198.8647    21688594.0
+200.8848    7742528.0
+206.9034    26130980.0
+216.9205    32607700.0
+234.0134    2550129.0
+254.8252    23747536.0
+256.8215    31377637.0
+258.8237    15532799.0
+266.8652    9805546.0
+268.8537    3090354.0
+306.9914    3169316.0
+312.7841    10051801.0
+316.7777    10734168.0
+322.8157    6317648.0
+324.9549    8619910.0
+334.849     4178412.0
+342.8093    3285552.0
+349.9455    2050695.0
+350.9875    6150799.0
+351.941     1965882.0
+366.8281    3253770.0
+370.7418    9765463.0
+372.7383    19374863.0
+382.8218    12815572.0
+384.8177    8311500.0
+392.7685    10913351.0
+413.2664    3965867.0
+426.7772    5431633.0
+428.7834    8554675.0
+434.7287    9943329.0
+436.8161    3705247.0
+440.7322    10603010.0
+442.7401    8271752.0
+450.7016    8762673.0
+460.7076    4528973.0
+462.7862    2123666.0
+484.7242    4273989.0
+486.7743    4886062.0
+488.6825    12267966.0
+492.744     7662344.0
+494.8953    7188793.0
+498.8794    6811405.0
+500.8484    6520691.0
+502.7832    3567833.0
+510.763     4989757.0
+518.7415    4243468.0
+546.6093    7177067.0
+550.6949    6104789.0
+566.5977    5171811.0
+612.6927    2005587.0
+676.6436    1982714.0
+800.4451    2792137.0
+
+NAME: C002
+IONMODE: Negative
+RETENTIONTIME: 520.25
+RETENTIONINDEX: 1234.5
+SPECTRUMTYPE: Centroid
+TOOL_USED: matchms
+NUM PEAKS: 35
+131.1733    1971789.0
+267.2688    6103973.0
+279.0196    1946255.0
+289.6491    46498377.0
+301.1565    15185412.0
+309.1649    18045974.0
+310.1623    295359836.0
+311.1658    13124727.0
+312.0296    38757284.0
+330.6757    12666597.0
+525.375     1073323842.0
+526.3783    181668883.0
+527.3812    23642795.0
+551.3321    111616808.0
+552.3348    28340614.0
+553.3314    2609936.0
+562.3269    7538206.0
+578.2905    7578406.0
+619.3008    4742103.0
+624.296     11790213.0
+813.5403    25060147.0
+814.5336    5865975.0
+955.1171    2322927.0
+1047.7378   150394804.0
+1048.7399   90978863.0
+1049.7432   29946438.0
+1050.7453   6807767.0
+1069.7158   5074652.0
+1074.1979   3402288.0
+1075.1968   33352763.0
+1076.2004   10417953.0
+1101.6535   2023916.0
+1206.3127   3738816.0
+1216.8041   4439324.0
+1217.807    3565334.0
+
+NAME: C003
+IONMODE: Negative
+RETENTIONTIME: 483.67
+SPECTRUMTYPE: Centroid
+TOOL_USED: matchms
+NUM PEAKS: 26
+265.2529    11366224.0
+266.2564    1420444.0
+279.6362    29849749.0
+280.6546    8848921.0
+288.6414    202172046.0
+378.2093    15309961.0
+379.1966    2902366.0
+522.3565    4089569222.0
+523.354     1201714423.0
+549.3267    63300808.0
+576.2749    7386007.0
+577.3074    2354251.0
+617.2778    2323470.0
+625.4543    4040374.0
+796.9808    13576738.0
+797.9841    6368973.0
+809.9883    12596682.0
+810.9916    6601055.0
+1043.7028   144351468.0
+1044.7068   83271854.0
+1045.706    27998321.0
+1046.7131   6505178.0
+1058.1594   20718345.0
+1059.1626   6608764.0
+1071.1639   15461047.0
+1072.1671   5096642.0
+
+NAME: C004
+IONMODE: Negative
+RETENTIONTIME: 473.48
+SPECTRUMTYPE: Centroid
+TOOL_USED: matchms
+NUM PEAKS: 24
+124.1405    6517662.0
+170.2437    1237313.0
+275.6336    28001849.0
+296.147     190395687.0
+482.3247    145772322.0
+483.3283    36245876.0
+496.34      12577588056.0
+497.3442    3337125302.0
+498.3462    532285213.0
+499.3493    68176083.0
+770.964     49250157.0
+771.9675    22666873.0
+783.9721    9839299.0
+784.9749    3622908.0
+949.6233    8009033.0
+950.6274    3674694.0
+991.6726    1420557258.0
+992.6749    763118028.0
+993.6787    239161906.0
+994.6801    53549573.0
+1017.6897   168186952.0
+1018.6656   120599518.0
+1019.6555   57647644.0
+1020.6591   12469103.0
+
+NAME: C005
+IONMODE: Negative
+RETENTIONTIME: 41.72
+SPECTRUMTYPE: Centroid
+TOOL_USED: matchms
+NUM PEAKS: 20
+218.1386    14009249.0
+337.0623    88672453.0
+338.0654    8770055.0
+353.0361    37061354.0
+359.0443    48435582.0
+360.0459    5025128.0
+375.018     29159485.0
+376.0216    2740193.0
+381.0261    13522755.0
+396.9999    10317665.0
+417.0027    13822994.0
+418.9966    4386311.0
+432.9764    9779399.0
+438.9851    11307111.0
+440.9796    3364168.0
+454.9592    9820452.0
+456.9603    3774845.0
+470.9263    3632486.0
+512.8989    4072570.0
+572.871     3485486.0
+
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/convert/metadata.csv
--- a/test-data/convert/metadata.csv Mon Jan 15 12:28:02 2024 +0000
+++ b/test-data/convert/metadata.csv Mon Feb 05 10:35:49 2024 +0000
[
b'@@ -1,171 +1,171 @@\n-ionization,inchi,authors,spectrumtype,inchikey,instrument_type,smiles,license,scannumber,comment,ionmode,compound_name,adduct,num_peaks,precursor_mz,peak_comments,instrument,formula,collision_energy,retention_time\r\n-ESI+,,"Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)",Centroid,YASYVMFAVPKPKE-SECBINFHSA-N,LC-ESI-Orbitrap,COP(=O)(N=C(O)C)SC,CC BY-NC,1161,,Positive,Acephate,[M+H]+,16,184.0194,,LC Orbitrap Fusion Tribrid MS,C4H10NO3PS,,1.232997\r\n-ESI+,,"Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)",Centroid,CVXBEEMKQHEXEN-UHFFFAOYSA-N,LC-ESI-Orbitrap,CN=C(Oc1cccc2c1cccc2)O,CC BY-NC,2257,,Positive,Carbaryl,[M+H]+,1,202.0863,"{145.06491: \'Theoretical m/z 145.064787, Mass diff 0 (0.85 ppm), SMILES OC1=CC=CC=2C=CC=CC12, Annotation [C10H8O+H]+, Rule of HR True\'}",LC Orbitrap Fusion Tribrid MS,C12H11NO2,,5.259445\r\n-ESI+,,"Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)",Centroid,VEENJGZXVHKXNB-UHFFFAOYSA-N,LC-ESI-Orbitrap,COP(=O)(OC(=CC(=O)N(C)C)C)OC,CC BY-NC,1516,,Positive,Dicrotophos,[M+H]+,5,238.0844,"{112.07591: \'Theoretical m/z 112.075687, Mass diff 0 (1.99 ppm), SMILES O=C(C=CC)N(C)C, Annotation [C6H11NO-H]+, Rule of HR True\', 127.01563: \'Theoretical m/z 127.01547, Mass diff 0 (1.26 ppm), SMILES O=P(O)(OC)OC, Annotation [C2H7O4P+H]+, Rule of HR True\', 193.02605: \'Theoretical m/z 193.026035, Mass diff 0 (0.08 ppm), SMILES O=CC=C(OP(=O)(OC)OC)C, Annotation [C6H11O5P-H]+, Rule of HR True\', 238.08437: \'Theoretical m/z 238.083891, Mass diff 0 (2.01 ppm), SMILES O=C(C=C(OP(=O)(OC)OC)C)N(C)C, Annotation [C8H16NO5P+H]+, Rule of HR True\'}",LC Orbitrap Fusion Tribrid MS,C8H16NO5P,,2.025499\r\n-ESI+,,"Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)",Centroid,MCWXGJITAZMZEV-UHFFFAOYSA-N,LC-ESI-Orbitrap,CN=C(CSP(=S)(OC)OC)O,CC BY-NC,1865,,Positive,Dimethoate,[M+H]+,8,230.0072,"{88.0219: \'Theoretical m/z 88.021549, Mass diff 0 (3.99 ppm), SMILES SCC=NC, Annotation [C3H7NS-H]+, Rule of HR True\', 124.98233: \'Theoretical m/z 124.982067, Mass diff 0 (2.11 ppm), SMILES S=P(OC)OC, Annotation [C2H7O2PS-H]+, Rule of HR True\', 142.99275: \'Theoretical m/z 142.993177, Mass diff 0 (0 ppm), Formula C2H8O3PS\', 156.95422: \'Theoretical m/z 156.954136, Mass diff 0 (0.54 ppm), SMILES S=P(S)(OC)OC, Annotation [C2H7O2PS2-H]+, Rule of HR True\', 170.97: \'Theoretical m/z 170.969791, Mass diff 0 (1.22 ppm), SMILES S=P(OC)(OC)SC, Annotation [C3H9O2PS2-H]+, Rule of HR True\', 197.98123: \'Theoretical m/z 197.980686, Mass diff 0.001 (2.75 ppm), SMILES S=P(OC)SCC(O)=NC, Annotation [C4H10NO2PS2-H]+, Rule of HR True\', 198.96501: \'Theoretical m/z 198.965248, Mass diff 0 (0 ppm), Formula C4H8O3PS2\', 230.00722: \'Theoretical m/z 230.006895, Mass diff 0 (1.41 ppm), SMILES S=P(OC)(OC)SCC(O)=NC, Annotation [C5H12NO3PS2+H]+, Rule of HR True\'}",LC Orbitrap Fusion Tribrid MS,C5H12NO3PS2,,2.866696\r\n-ESI+,,"Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)",Centroid,QNBTYORWCCMPQP-UHFFFAOYSA-N,LC-ESI-Orbitrap,COc1cc(ccc1OC)C(=CC(=O)N1CCOCC1)c1ccc(cc1)Cl,CC BY-NC,3852,,Positive,Dimethomorph,[M+H]+,22,388.1316,"{114.05532: \'Theoretical m/z 114.054958, Mass diff 0 (3.17 ppm), SMILES O=CN1CCOCC1, Annotation [C5H9NO2-H]+, Rule of HR True\', 125.01571: \'Theoretical m/z 125.015255, Mass diff 0 (3.64 ppm), SMILES ClC1=CC=C(C=C1)C, Annotation [C7H7Cl-H]+, Rule of HR True\', 138.99484: \'Theoretical m/z 138.995067, Mass diff 0 (0 ppm), Formula C7H4ClO\', 155.0705: \'Theoretical m/z 155.070819, Mass diff 0 (0 ppm), Formula C8H11O3\', 165.05519: \'Theoretical m/z 165.055169, Mass diff -0.001 (0 ppm), Formula C9H9O3\', 195.08057: \'Theoretical m/z 195.08099, Mass diff 0 (0 ppm), Formula C14H11O\', 215.0262: \'Theoretical m/z 215.026368, Mass diff 0 (0 ppm), Formula C13H8ClO\', 223.07544: \'Theoretical m/z 223.075837, Mass diff 0 (1.78 ppm), SMILES O=C(C=CC1=CC=C(Cl)C=C1)N(C)CC, Annotation [C12H14ClNO]+, Rule of HR False\', 227.02576: \'Theoretical m/z 227.026368, Mass diff 0 (0 ppm), Formula C'..b'ical m/z 118.041865, Mass diff 0 (0 ppm), Formula C8H6O\', 120.04462: \'Theoretical m/z 120.044391, Mass diff 0 (1.91 ppm), SMILES OC1=CC=CC(N=C)=C1, Annotation [C7H7NO-H]+, Rule of HR True\', 121.03984: \'Theoretical m/z 121.040188, Mass diff 0 (0 ppm), Formula C6H5N2O\', 122.06016: \'Theoretical m/z 122.060041, Mass diff 0 (0.97 ppm), SMILES OC1=CC=CC(N=C)=C1, Annotation [C7H7NO+H]+, Rule of HR True\', 124.07605: \'Theoretical m/z 124.075689, Mass diff 0 (2.91 ppm), SMILES O(C1=CC=CC(N)=C1)C, Annotation [C7H9NO+H]+, Rule of HR True\', 135.04427: \'Theoretical m/z 135.044604, Mass diff 0 (0 ppm), Formula C8H7O2\', 145.06488: \'Theoretical m/z 145.06534, Mass diff 0 (0 ppm), Formula C10H9O\', 165.1024: \'Theoretical m/z 165.102232, Mass diff 0 (1.02 ppm), SMILES OC1=CC=CC(N=CN(C)C)=C1, Annotation [C9H12N2O+H]+, Rule of HR True\'}",MYPKGPZHHQEODQ-UHFFFAOYSA-N,222.1239,15,"Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)",Formetanate_2,1.13043,C11H15N3O2,LC-ESI-Orbitrap,positive,1161,CN=C(Oc1cccc(c1)N=CN(C)C)O,LC Orbitrap Fusion Tribrid MS,Centroid,ESI+,[M+H]+,CC BY-NC\r\n+"{134.07283: \'Theoretical m/z 134.072623, Mass diff 0 (1.55 ppm), SMILES O(C=1C=C(C=C(C1)C)C)C, Annotation [C9H12O-2H]+, Rule of HR False\', 136.07611: \'Theoretical m/z 136.076239, Mass diff 0 (0 ppm), Formula C8H10NO\', 150.092: \'Theoretical m/z 150.091343, Mass diff 0.001 (4.38 ppm), SMILES OC1=CC=C(C(=C1)C)N(C)C, Annotation [C9H13NO-H]+, Rule of HR True\', 151.09932: \'Theoretical m/z 151.099168, Mass diff 0 (1.01 ppm), SMILES OC1=CC=C(C(=C1)C)N(C)C, Annotation [C9H13NO]+, Rule of HR False\', 166.12282: \'Theoretical m/z 166.122633, Mass diff 0 (1.13 ppm), SMILES OC=1C=C(C(=C(C1)C)N(C)C)C, Annotation [C10H15NO+H]+, Rule of HR True\'}",YNEVBPNZHBAYOA-UHFFFAOYSA-N,223.1443,5,"Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)",Mexacarbate,1.682191,C12H18N2O2,LC-ESI-Orbitrap,positive,1328,CN=C(Oc1cc(C)c(c(c1)C)N(C)C)O,LC Orbitrap Fusion Tribrid MS,Centroid,ESI+,[M+H]+,CC BY-NC\r\n+"{89.03881: \'Theoretical m/z 89.038575, Mass diff 0 (2.63 ppm), SMILES C=1C=CC(=CC1)C, Annotation [C7H8-3H]+, Rule of HR True\', 94.06543: \'Theoretical m/z 94.065123, Mass diff 0 (3.27 ppm), SMILES NC=1C=CC=CC1, Annotation [C6H7N+H]+, Rule of HR True\', 106.06545: \'Theoretical m/z 106.065123, Mass diff 0 (3.09 ppm), SMILES N(=C)C=1C=CC=CC1, Annotation [C7H7N+H]+, Rule of HR True\', 125.01532: \'Theoretical m/z 125.015255, Mass diff 0 (0.52 ppm), SMILES ClC1=CC=C(C=C1)C, Annotation [C7H7Cl-H]+, Rule of HR True\'}",OGYFATSSENRIKG-UHFFFAOYSA-N,329.1426,5,"Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)",Monceren,7.14553,C19H21N2OCl,LC-ESI-Orbitrap,positive,3999,Clc1ccc(cc1)CN(C(=Nc1ccccc1)O)C1CCCC1,LC Orbitrap Fusion Tribrid MS,Centroid,ESI+,[M+H]+,CC BY-NC\r\n+"{136.03947: \'Theoretical m/z 136.03931, Mass diff 0 (1.18 ppm), SMILES OC(O)=NC=1C=CC=CC1, Annotation [C7H7NO2-H]+, Rule of HR True\', 154.04993: \'Theoretical m/z 154.049864, Mass diff 0 (0.43 ppm), SMILES OC(O)=NC=1C=CC=C(O)C1, Annotation [C7H7NO3+H]+, Rule of HR True\', 182.08162: \'Theoretical m/z 182.081175, Mass diff 0 (2.45 ppm), SMILES OC(=NC=1C=CC=C(O)C1)OCC, Annotation [C9H11NO3+H]+, Rule of HR True\'}",WZJZMXBKUWKXTQ-UHFFFAOYSA-N,301.1192,3,"Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)",Desmedipham,6.430396,C16H16N2O4,LC-ESI-Orbitrap,positive,2271,CCOC(=Nc1cccc(c1)OC(=Nc1ccccc1)O)O,LC Orbitrap Fusion Tribrid MS,Centroid,ESI+,[M+H]+,CC BY-NC\r\n+"{136.03947: \'Theoretical m/z 136.03931, Mass diff 0 (1.18 ppm), SMILES OC(O)=NC=1C=CC=CC1, Annotation [C7H7NO2-H]+, Rule of HR True\', 168.06587: \'Theoretical m/z 168.065519, Mass diff 0 (2.09 ppm), SMILES OC(=NC=1C=CC=C(O)C1)OC, Annotation [C8H9NO3+H]+, Rule of HR True\'}",IDOWTHOLJBTAFI-UHFFFAOYSA-N,301.1185,2,"Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)",Phenmedipham,6.570995,C16H16N2O4,LC-ESI-Orbitrap,positive,2458,COC(=Nc1cccc(c1)OC(=Nc1cccc(c1)C)O)O,LC Orbitrap Fusion Tribrid MS,Centroid,ESI+,[M+H]+,CC BY-NC\r\n'
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/out_matchms_add_key.msp
--- a/test-data/out_matchms_add_key.msp Mon Jan 15 12:28:02 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,199 +0,0 @@
-IONMODE: Negative
-SPECTRUMTYPE: Centroid
-COMPOUND_NAME: C001
-RETENTION_TIME: 38.74
-RETENTION_INDEX: -1
-TOOL_USED: matchms
-NUM PEAKS: 57
-138.9121    10186226.0
-148.9337    1008656.0
-175.0641    26780143.0
-186.1095    2675456.0
-196.8658    21390430.0
-198.8647    21688594.0
-200.8848    7742528.0
-206.9034    26130980.0
-216.9205    32607700.0
-234.0134    2550129.0
-254.8252    23747536.0
-256.8215    31377637.0
-258.8237    15532799.0
-266.8652    9805546.0
-268.8537    3090354.0
-306.9914    3169316.0
-312.7841    10051801.0
-316.7777    10734168.0
-322.8157    6317648.0
-324.9549    8619910.0
-334.849     4178412.0
-342.8093    3285552.0
-349.9455    2050695.0
-350.9875    6150799.0
-351.941     1965882.0
-366.8281    3253770.0
-370.7418    9765463.0
-372.7383    19374863.0
-382.8218    12815572.0
-384.8177    8311500.0
-392.7685    10913351.0
-413.2664    3965867.0
-426.7772    5431633.0
-428.7834    8554675.0
-434.7287    9943329.0
-436.8161    3705247.0
-440.7322    10603010.0
-442.7401    8271752.0
-450.7016    8762673.0
-460.7076    4528973.0
-462.7862    2123666.0
-484.7242    4273989.0
-486.7743    4886062.0
-488.6825    12267966.0
-492.744     7662344.0
-494.8953    7188793.0
-498.8794    6811405.0
-500.8484    6520691.0
-502.7832    3567833.0
-510.763     4989757.0
-518.7415    4243468.0
-546.6093    7177067.0
-550.6949    6104789.0
-566.5977    5171811.0
-612.6927    2005587.0
-676.6436    1982714.0
-800.4451    2792137.0
-
-IONMODE: Negative
-SPECTRUMTYPE: Centroid
-COMPOUND_NAME: C002
-RETENTION_TIME: 520.25
-RETENTION_INDEX: 1234.5
-TOOL_USED: matchms
-NUM PEAKS: 35
-131.1733    1971789.0
-267.2688    6103973.0
-279.0196    1946255.0
-289.6491    46498377.0
-301.1565    15185412.0
-309.1649    18045974.0
-310.1623    295359836.0
-311.1658    13124727.0
-312.0296    38757284.0
-330.6757    12666597.0
-525.375     1073323842.0
-526.3783    181668883.0
-527.3812    23642795.0
-551.3321    111616808.0
-552.3348    28340614.0
-553.3314    2609936.0
-562.3269    7538206.0
-578.2905    7578406.0
-619.3008    4742103.0
-624.296     11790213.0
-813.5403    25060147.0
-814.5336    5865975.0
-955.1171    2322927.0
-1047.7378   150394804.0
-1048.7399   90978863.0
-1049.7432   29946438.0
-1050.7453   6807767.0
-1069.7158   5074652.0
-1074.1979   3402288.0
-1075.1968   33352763.0
-1076.2004   10417953.0
-1101.6535   2023916.0
-1206.3127   3738816.0
-1216.8041   4439324.0
-1217.807    3565334.0
-
-IONMODE: Negative
-SPECTRUMTYPE: Centroid
-COMPOUND_NAME: C003
-RETENTION_TIME: 483.67
-TOOL_USED: matchms
-NUM PEAKS: 26
-265.2529    11366224.0
-266.2564    1420444.0
-279.6362    29849749.0
-280.6546    8848921.0
-288.6414    202172046.0
-378.2093    15309961.0
-379.1966    2902366.0
-522.3565    4089569222.0
-523.354     1201714423.0
-549.3267    63300808.0
-576.2749    7386007.0
-577.3074    2354251.0
-617.2778    2323470.0
-625.4543    4040374.0
-796.9808    13576738.0
-797.9841    6368973.0
-809.9883    12596682.0
-810.9916    6601055.0
-1043.7028   144351468.0
-1044.7068   83271854.0
-1045.706    27998321.0
-1046.7131   6505178.0
-1058.1594   20718345.0
-1059.1626   6608764.0
-1071.1639   15461047.0
-1072.1671   5096642.0
-
-IONMODE: Negative
-SPECTRUMTYPE: Centroid
-COMPOUND_NAME: C004
-RETENTION_TIME: 473.48
-TOOL_USED: matchms
-NUM PEAKS: 24
-124.1405    6517662.0
-170.2437    1237313.0
-275.6336    28001849.0
-296.147     190395687.0
-482.3247    145772322.0
-483.3283    36245876.0
-496.34      12577588056.0
-497.3442    3337125302.0
-498.3462    532285213.0
-499.3493    68176083.0
-770.964     49250157.0
-771.9675    22666873.0
-783.9721    9839299.0
-784.9749    3622908.0
-949.6233    8009033.0
-950.6274    3674694.0
-991.6726    1420557258.0
-992.6749    763118028.0
-993.6787    239161906.0
-994.6801    53549573.0
-1017.6897   168186952.0
-1018.6656   120599518.0
-1019.6555   57647644.0
-1020.6591   12469103.0
-
-IONMODE: Negative
-SPECTRUMTYPE: Centroid
-COMPOUND_NAME: C005
-RETENTION_TIME: 41.72
-TOOL_USED: matchms
-NUM PEAKS: 20
-218.1386    14009249.0
-337.0623    88672453.0
-338.0654    8770055.0
-353.0361    37061354.0
-359.0443    48435582.0
-360.0459    5025128.0
-375.018     29159485.0
-376.0216    2740193.0
-381.0261    13522755.0
-396.9999    10317665.0
-417.0027    13822994.0
-418.9966    4386311.0
-432.9764    9779399.0
-438.9851    11307111.0
-440.9796    3364168.0
-454.9592    9820452.0
-456.9603    3774845.0
-470.9263    3632486.0
-512.8989    4072570.0
-572.871     3485486.0
-
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/chunk-size/chunk_0.msp
--- a/test-data/split/chunk-size/chunk_0.msp Mon Jan 15 12:28:02 2024 +0000
+++ b/test-data/split/chunk-size/chunk_0.msp Mon Feb 05 10:35:49 2024 +0000
[
@@ -1,12 +1,18 @@
+NAME: 1-NITROPYRENE
 SYNONYM: 1-NITROPYRENE
+DB#: JP000001
 INCHIKEY: ALRLPDGCPYIVHP-UHFFFAOYSA-N
+MW: 247.063328528
 FORMULA: C16H9NO2
+PRECURSORMZ: 0
+ACCESSION: JP000001
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: [O-1][N+1](=O)c(c4)c(c1)c(c3c4)c(c2cc3)c(ccc2)c1
 INCHI: InChI=1S/C16H9NO2/c18-17(19)14-9-7-12-5-4-10-2-1-3-11-6-8-13(14)16(12)15(10)11/h1-9H
 SMILES_2: [H]C=1C([H])=C2C([H])=C([H])C3=C([H])C([H])=C(C=4C([H])=C([H])C(C1[H])=C2C34)N(=O)=O
+EXACT_MASS: 247.06333
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -15,11 +21,6 @@
 LAST_AUTO-CURATION: 1495210335755
 MOLECULAR_FORMULA: C16H9NO2
 TOTAL_EXACT_MASS: 247.063328528
-COMPOUND_NAME: 1-NITROPYRENE
-SPECTRUM_ID: JP000001
-NOMINAL_MASS: 247.063328528
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 247.06333
 NUM PEAKS: 75
 51.0        2.66
 55.0        8.0
@@ -97,15 +98,21 @@
 247.0       52.66
 248.0       10.16
 
+NAME: 2,4-DINITROPHENOL
 SYNONYM: 2,4-DINITROPHENOL
+DB#: JP000002
 INCHIKEY: UFBJCMHMOXMLKC-UHFFFAOYSA-N
+MW: 184.01202122799998
 FORMULA: C6H4N2O5
+PRECURSORMZ: 0
+ACCESSION: JP000002
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: [O-1][N+1](=O)c(c1)cc([N+1]([O-1])=O)c(O)c1
 INCHI: InChI=1S/C6H4N2O5/c9-6-2-1-4(7(10)11)3-5(6)8(12)13/h1-3,9H
 SMILES_2: [H]OC1=C([H])C([H])=C(C([H])=C1N(=O)=O)N(=O)=O
+EXACT_MASS: 184.01202
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -114,11 +121,6 @@
 LAST_AUTO-CURATION: 1495210335764
 MOLECULAR_FORMULA: C6H4N2O5
 TOTAL_EXACT_MASS: 184.01202122799998
-COMPOUND_NAME: 2,4-DINITROPHENOL
-SPECTRUM_ID: JP000002
-NOMINAL_MASS: 184.01202122799998
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 184.01202
 NUM PEAKS: 64
 51.0        27.22
 52.0        19.9
@@ -185,15 +187,21 @@
 185.0       8.17
 186.0       1.34
 
+NAME: 3,4-DICHLOROPHENOL
 SYNONYM: 3,4-DICHLOROPHENOL
+DB#: JP000003
 INCHIKEY: WDNBURPWRNALGP-UHFFFAOYSA-N
+MW: 161.963920108
 FORMULA: C6H4Cl2O
+PRECURSORMZ: 0
+ACCESSION: JP000003
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: Oc(c1)cc(Cl)c(Cl)c1
 INCHI: InChI=1S/C6H4Cl2O/c7-5-2-1-4(9)3-6(5)8/h1-3,9H
 SMILES_2: [H]OC1=C([H])C([H])=C(Cl)C(Cl)=C1[H]
+EXACT_MASS: 161.96392
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -202,11 +210,6 @@
 LAST_AUTO-CURATION: 1495210335820
 MOLECULAR_FORMULA: C6H4Cl2O
 TOTAL_EXACT_MASS: 161.963920108
-COMPOUND_NAME: 3,4-DICHLOROPHENOL
-SPECTRUM_ID: JP000003
-NOMINAL_MASS: 161.963920108
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 161.96392
 NUM PEAKS: 36
 51.0        2.25
 53.0        6.4
@@ -245,15 +248,21 @@
 165.0       4.54
 166.0       9.78
 
+NAME: 2,5-DICHLOROPHENOL
 SYNONYM: 2,5-DICHLOROPHENOL
+DB#: JP000004
 INCHIKEY: RANCECPPZPIPNO-UHFFFAOYSA-N
+MW: 161.963920108
 FORMULA: C6H4Cl2O
+PRECURSORMZ: 0
+ACCESSION: JP000004
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: Oc(c1)c(Cl)ccc(Cl)1
 INCHI: InChI=1S/C6H4Cl2O/c7-4-1-2-5(8)6(9)3-4/h1-3,9H
 SMILES_2: [H]OC1=C([H])C(Cl)=C([H])C([H])=C1Cl
+EXACT_MASS: 161.96392
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -262,11 +271,6 @@
 LAST_AUTO-CURATION: 1495210335825
 MOLECULAR_FORMULA: C6H4Cl2O
 TOTAL_EXACT_MASS: 161.963920108
-COMPOUND_NAME: 2,5-DICHLOROPHENOL
-SPECTRUM_ID: JP000004
-NOMINAL_MASS: 161.963920108
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 161.96392
 NUM PEAKS: 44
 51.0        5.05
 52.0        2.29
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/chunk-size/chunk_1.msp
--- a/test-data/split/chunk-size/chunk_1.msp Mon Jan 15 12:28:02 2024 +0000
+++ b/test-data/split/chunk-size/chunk_1.msp Mon Feb 05 10:35:49 2024 +0000
[
@@ -1,12 +1,18 @@
+NAME: 2,6-DICHLOROPHENOL
 SYNONYM: 2,6-DICHLOROPHENOL
+DB#: JP000005
 INCHIKEY: HOLHYSJJBXSLMV-UHFFFAOYSA-N
+MW: 161.963920108
 FORMULA: C6H4Cl2O
+PRECURSORMZ: 0
+ACCESSION: JP000005
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: Clc(c1)c(O)c(Cl)cc1
 INCHI: InChI=1S/C6H4Cl2O/c7-4-2-1-3-5(8)6(4)9/h1-3,9H
 SMILES_2: [H]OC=1C(Cl)=C([H])C([H])=C([H])C1Cl
+EXACT_MASS: 161.96392
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -15,11 +21,6 @@
 LAST_AUTO-CURATION: 1495210335848
 MOLECULAR_FORMULA: C6H4Cl2O
 TOTAL_EXACT_MASS: 161.963920108
-COMPOUND_NAME: 2,6-DICHLOROPHENOL
-SPECTRUM_ID: JP000005
-NOMINAL_MASS: 161.963920108
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 161.96392
 NUM PEAKS: 33
 53.0        7.25
 60.0        3.0
@@ -55,15 +56,21 @@
 165.0       4.35
 166.0       9.91
 
+NAME: 2,3-DICHLOROPHENOL
 SYNONYM: 2,3-DICHLOROPHENOL
+DB#: JP000006
 INCHIKEY: UMPSXRYVXUPCOS-UHFFFAOYSA-N
+MW: 161.963920108
 FORMULA: C6H4Cl2O
+PRECURSORMZ: 0
+ACCESSION: JP000006
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: Oc(c1)c(Cl)c(Cl)cc1
 INCHI: InChI=1S/C6H4Cl2O/c7-4-2-1-3-5(9)6(4)8/h1-3,9H
 SMILES_2: [H]OC=1C([H])=C([H])C([H])=C(Cl)C1Cl
+EXACT_MASS: 161.96392
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -72,11 +79,6 @@
 LAST_AUTO-CURATION: 1495210335870
 MOLECULAR_FORMULA: C6H4Cl2O
 TOTAL_EXACT_MASS: 161.963920108
-COMPOUND_NAME: 2,3-DICHLOROPHENOL
-SPECTRUM_ID: JP000006
-NOMINAL_MASS: 161.963920108
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 161.96392
 NUM PEAKS: 42
 51.0        4.43
 53.0        10.39
@@ -121,15 +123,21 @@
 165.0       2.9
 166.0       7.58
 
+NAME: 2,4-DICHLOROPHENOL
 SYNONYM: 2,4-DICHLOROPHENOL
+DB#: JP000007
 INCHIKEY: HFZWRUODUSTPEG-UHFFFAOYSA-N
+MW: 161.963920108
 FORMULA: C6H4Cl2O
+PRECURSORMZ: 0
+ACCESSION: JP000007
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: Oc(c1)c(Cl)cc(Cl)c1
 INCHI: InChI=1S/C6H4Cl2O/c7-4-1-2-6(9)5(8)3-4/h1-3,9H
 SMILES_2: [H]OC1=C([H])C([H])=C(Cl)C([H])=C1Cl
+EXACT_MASS: 161.96392
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -138,11 +146,6 @@
 LAST_AUTO-CURATION: 1495210335864
 MOLECULAR_FORMULA: C6H4Cl2O
 TOTAL_EXACT_MASS: 161.963920108
-COMPOUND_NAME: 2,4-DICHLOROPHENOL
-SPECTRUM_ID: JP000007
-NOMINAL_MASS: 161.963920108
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 161.96392
 NUM PEAKS: 37
 51.0        3.07
 53.0        12.34
@@ -182,15 +185,21 @@
 165.0       5.54
 166.0       9.19
 
+NAME: 3,5-DICHLOROPHENOL
 SYNONYM: 3,5-DICHLOROPHENOL
+DB#: JP000008
 INCHIKEY: VPOMSPZBQMDLTM-UHFFFAOYSA-N
+MW: 161.963920108
 FORMULA: C6H4Cl2O
+PRECURSORMZ: 0
+ACCESSION: JP000008
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: Oc(c1)cc(Cl)cc(Cl)1
 INCHI: InChI=1S/C6H4Cl2O/c7-4-1-5(8)3-6(9)2-4/h1-3,9H
 SMILES_2: [H]OC=1C([H])=C(Cl)C([H])=C(Cl)C1[H]
+EXACT_MASS: 161.96392
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -199,11 +208,6 @@
 LAST_AUTO-CURATION: 1495210336053
 MOLECULAR_FORMULA: C6H4Cl2O
 TOTAL_EXACT_MASS: 161.963920108
-COMPOUND_NAME: 3,5-DICHLOROPHENOL
-SPECTRUM_ID: JP000008
-NOMINAL_MASS: 161.963920108
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 161.96392
 NUM PEAKS: 32
 51.0        1.24
 53.0        4.19
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/chunk-size/chunk_2.msp
--- a/test-data/split/chunk-size/chunk_2.msp Mon Jan 15 12:28:02 2024 +0000
+++ b/test-data/split/chunk-size/chunk_2.msp Mon Feb 05 10:35:49 2024 +0000
[
@@ -1,12 +1,18 @@
+NAME: 2,4,5-TRICHLOROPHENOL
 SYNONYM: 2,4,5-TRICHLOROPHENOL
+DB#: JP000009
 INCHIKEY: LHJGJYXLEPZJPM-UHFFFAOYSA-N
+MW: 195.924947756
 FORMULA: C6H3Cl3O
+PRECURSORMZ: 0
+ACCESSION: JP000009
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: Oc(c1)c(Cl)cc(Cl)c(Cl)1
 INCHI: InChI=1S/C6H3Cl3O/c7-3-1-5(9)6(10)2-4(3)8/h1-2,10H
 SMILES_2: [H]OC1=C([H])C(Cl)=C(Cl)C([H])=C1Cl
+EXACT_MASS: 195.92495
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -15,11 +21,6 @@
 LAST_AUTO-CURATION: 1495210336033
 MOLECULAR_FORMULA: C6H3Cl3O
 TOTAL_EXACT_MASS: 195.924947756
-COMPOUND_NAME: 2,4,5-TRICHLOROPHENOL
-SPECTRUM_ID: JP000009
-NOMINAL_MASS: 195.924947756
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 195.92495
 NUM PEAKS: 65
 51.0        2.58
 53.0        14.73
@@ -87,15 +88,21 @@
 201.0       2.08
 202.0       3.15
 
+NAME: 2,4,6-TRICHLOROPHENOL
 SYNONYM: 2,4,6-TRICHLOROPHENOL
+DB#: JP000010
 INCHIKEY: LINPIYWFGCPVIE-UHFFFAOYSA-N
+MW: 195.924947756
 FORMULA: C6H3Cl3O
+PRECURSORMZ: 0
+ACCESSION: JP000010
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: Clc(c1)cc(Cl)c(O)c(Cl)1
 INCHI: InChI=1S/C6H3Cl3O/c7-3-1-4(8)6(10)5(9)2-3/h1-2,10H
 SMILES_2: [H]OC=1C(Cl)=C([H])C(Cl)=C([H])C1Cl
+EXACT_MASS: 195.92495
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -104,11 +111,6 @@
 LAST_AUTO-CURATION: 1495210336053
 MOLECULAR_FORMULA: C6H3Cl3O
 TOTAL_EXACT_MASS: 195.924947756
-COMPOUND_NAME: 2,4,6-TRICHLOROPHENOL
-SPECTRUM_ID: JP000010
-NOMINAL_MASS: 195.924947756
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 195.92495
 NUM PEAKS: 66
 53.0        14.63
 55.0        2.49
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/num-chunks/chunk_0.msp
--- a/test-data/split/num-chunks/chunk_0.msp Mon Jan 15 12:28:02 2024 +0000
+++ b/test-data/split/num-chunks/chunk_0.msp Mon Feb 05 10:35:49 2024 +0000
[
@@ -1,12 +1,18 @@
+NAME: 1-NITROPYRENE
 SYNONYM: 1-NITROPYRENE
+DB#: JP000001
 INCHIKEY: ALRLPDGCPYIVHP-UHFFFAOYSA-N
+MW: 247.063328528
 FORMULA: C16H9NO2
+PRECURSORMZ: 0
+ACCESSION: JP000001
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: [O-1][N+1](=O)c(c4)c(c1)c(c3c4)c(c2cc3)c(ccc2)c1
 INCHI: InChI=1S/C16H9NO2/c18-17(19)14-9-7-12-5-4-10-2-1-3-11-6-8-13(14)16(12)15(10)11/h1-9H
 SMILES_2: [H]C=1C([H])=C2C([H])=C([H])C3=C([H])C([H])=C(C=4C([H])=C([H])C(C1[H])=C2C34)N(=O)=O
+EXACT_MASS: 247.06333
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -15,11 +21,6 @@
 LAST_AUTO-CURATION: 1495210335755
 MOLECULAR_FORMULA: C16H9NO2
 TOTAL_EXACT_MASS: 247.063328528
-COMPOUND_NAME: 1-NITROPYRENE
-SPECTRUM_ID: JP000001
-NOMINAL_MASS: 247.063328528
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 247.06333
 NUM PEAKS: 75
 51.0        2.66
 55.0        8.0
@@ -97,15 +98,21 @@
 247.0       52.66
 248.0       10.16
 
+NAME: 3,4-DICHLOROPHENOL
 SYNONYM: 3,4-DICHLOROPHENOL
+DB#: JP000003
 INCHIKEY: WDNBURPWRNALGP-UHFFFAOYSA-N
+MW: 161.963920108
 FORMULA: C6H4Cl2O
+PRECURSORMZ: 0
+ACCESSION: JP000003
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: Oc(c1)cc(Cl)c(Cl)c1
 INCHI: InChI=1S/C6H4Cl2O/c7-5-2-1-4(9)3-6(5)8/h1-3,9H
 SMILES_2: [H]OC1=C([H])C([H])=C(Cl)C(Cl)=C1[H]
+EXACT_MASS: 161.96392
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -114,11 +121,6 @@
 LAST_AUTO-CURATION: 1495210335820
 MOLECULAR_FORMULA: C6H4Cl2O
 TOTAL_EXACT_MASS: 161.963920108
-COMPOUND_NAME: 3,4-DICHLOROPHENOL
-SPECTRUM_ID: JP000003
-NOMINAL_MASS: 161.963920108
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 161.96392
 NUM PEAKS: 36
 51.0        2.25
 53.0        6.4
@@ -157,15 +159,21 @@
 165.0       4.54
 166.0       9.78
 
+NAME: 2,6-DICHLOROPHENOL
 SYNONYM: 2,6-DICHLOROPHENOL
+DB#: JP000005
 INCHIKEY: HOLHYSJJBXSLMV-UHFFFAOYSA-N
+MW: 161.963920108
 FORMULA: C6H4Cl2O
+PRECURSORMZ: 0
+ACCESSION: JP000005
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: Clc(c1)c(O)c(Cl)cc1
 INCHI: InChI=1S/C6H4Cl2O/c7-4-2-1-3-5(8)6(4)9/h1-3,9H
 SMILES_2: [H]OC=1C(Cl)=C([H])C([H])=C([H])C1Cl
+EXACT_MASS: 161.96392
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -174,11 +182,6 @@
 LAST_AUTO-CURATION: 1495210335848
 MOLECULAR_FORMULA: C6H4Cl2O
 TOTAL_EXACT_MASS: 161.963920108
-COMPOUND_NAME: 2,6-DICHLOROPHENOL
-SPECTRUM_ID: JP000005
-NOMINAL_MASS: 161.963920108
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 161.96392
 NUM PEAKS: 33
 53.0        7.25
 60.0        3.0
@@ -214,15 +217,21 @@
 165.0       4.35
 166.0       9.91
 
+NAME: 2,4-DICHLOROPHENOL
 SYNONYM: 2,4-DICHLOROPHENOL
+DB#: JP000007
 INCHIKEY: HFZWRUODUSTPEG-UHFFFAOYSA-N
+MW: 161.963920108
 FORMULA: C6H4Cl2O
+PRECURSORMZ: 0
+ACCESSION: JP000007
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: Oc(c1)c(Cl)cc(Cl)c1
 INCHI: InChI=1S/C6H4Cl2O/c7-4-1-2-6(9)5(8)3-4/h1-3,9H
 SMILES_2: [H]OC1=C([H])C([H])=C(Cl)C([H])=C1Cl
+EXACT_MASS: 161.96392
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -231,11 +240,6 @@
 LAST_AUTO-CURATION: 1495210335864
 MOLECULAR_FORMULA: C6H4Cl2O
 TOTAL_EXACT_MASS: 161.963920108
-COMPOUND_NAME: 2,4-DICHLOROPHENOL
-SPECTRUM_ID: JP000007
-NOMINAL_MASS: 161.963920108
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 161.96392
 NUM PEAKS: 37
 51.0        3.07
 53.0        12.34
@@ -275,15 +279,21 @@
 165.0       5.54
 166.0       9.19
 
+NAME: 2,4,5-TRICHLOROPHENOL
 SYNONYM: 2,4,5-TRICHLOROPHENOL
+DB#: JP000009
 INCHIKEY: LHJGJYXLEPZJPM-UHFFFAOYSA-N
+MW: 195.924947756
 FORMULA: C6H3Cl3O
+PRECURSORMZ: 0
+ACCESSION: JP000009
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: Oc(c1)c(Cl)cc(Cl)c(Cl)1
 INCHI: InChI=1S/C6H3Cl3O/c7-3-1-5(9)6(10)2-4(3)8/h1-2,10H
 SMILES_2: [H]OC1=C([H])C(Cl)=C(Cl)C([H])=C1Cl
+EXACT_MASS: 195.92495
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -292,11 +302,6 @@
 LAST_AUTO-CURATION: 1495210336033
 MOLECULAR_FORMULA: C6H3Cl3O
 TOTAL_EXACT_MASS: 195.924947756
-COMPOUND_NAME: 2,4,5-TRICHLOROPHENOL
-SPECTRUM_ID: JP000009
-NOMINAL_MASS: 195.924947756
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 195.92495
 NUM PEAKS: 65
 51.0        2.58
 53.0        14.73
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/num-chunks/chunk_1.msp
--- a/test-data/split/num-chunks/chunk_1.msp Mon Jan 15 12:28:02 2024 +0000
+++ b/test-data/split/num-chunks/chunk_1.msp Mon Feb 05 10:35:49 2024 +0000
[
@@ -1,12 +1,18 @@
+NAME: 2,4-DINITROPHENOL
 SYNONYM: 2,4-DINITROPHENOL
+DB#: JP000002
 INCHIKEY: UFBJCMHMOXMLKC-UHFFFAOYSA-N
+MW: 184.01202122799998
 FORMULA: C6H4N2O5
+PRECURSORMZ: 0
+ACCESSION: JP000002
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: [O-1][N+1](=O)c(c1)cc([N+1]([O-1])=O)c(O)c1
 INCHI: InChI=1S/C6H4N2O5/c9-6-2-1-4(7(10)11)3-5(6)8(12)13/h1-3,9H
 SMILES_2: [H]OC1=C([H])C([H])=C(C([H])=C1N(=O)=O)N(=O)=O
+EXACT_MASS: 184.01202
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -15,11 +21,6 @@
 LAST_AUTO-CURATION: 1495210335764
 MOLECULAR_FORMULA: C6H4N2O5
 TOTAL_EXACT_MASS: 184.01202122799998
-COMPOUND_NAME: 2,4-DINITROPHENOL
-SPECTRUM_ID: JP000002
-NOMINAL_MASS: 184.01202122799998
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 184.01202
 NUM PEAKS: 64
 51.0        27.22
 52.0        19.9
@@ -86,15 +87,21 @@
 185.0       8.17
 186.0       1.34
 
+NAME: 2,5-DICHLOROPHENOL
 SYNONYM: 2,5-DICHLOROPHENOL
+DB#: JP000004
 INCHIKEY: RANCECPPZPIPNO-UHFFFAOYSA-N
+MW: 161.963920108
 FORMULA: C6H4Cl2O
+PRECURSORMZ: 0
+ACCESSION: JP000004
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: Oc(c1)c(Cl)ccc(Cl)1
 INCHI: InChI=1S/C6H4Cl2O/c7-4-1-2-5(8)6(9)3-4/h1-3,9H
 SMILES_2: [H]OC1=C([H])C(Cl)=C([H])C([H])=C1Cl
+EXACT_MASS: 161.96392
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -103,11 +110,6 @@
 LAST_AUTO-CURATION: 1495210335825
 MOLECULAR_FORMULA: C6H4Cl2O
 TOTAL_EXACT_MASS: 161.963920108
-COMPOUND_NAME: 2,5-DICHLOROPHENOL
-SPECTRUM_ID: JP000004
-NOMINAL_MASS: 161.963920108
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 161.96392
 NUM PEAKS: 44
 51.0        5.05
 52.0        2.29
@@ -154,15 +156,21 @@
 165.0       4.62
 166.0       8.78
 
+NAME: 2,3-DICHLOROPHENOL
 SYNONYM: 2,3-DICHLOROPHENOL
+DB#: JP000006
 INCHIKEY: UMPSXRYVXUPCOS-UHFFFAOYSA-N
+MW: 161.963920108
 FORMULA: C6H4Cl2O
+PRECURSORMZ: 0
+ACCESSION: JP000006
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: Oc(c1)c(Cl)c(Cl)cc1
 INCHI: InChI=1S/C6H4Cl2O/c7-4-2-1-3-5(9)6(4)8/h1-3,9H
 SMILES_2: [H]OC=1C([H])=C([H])C([H])=C(Cl)C1Cl
+EXACT_MASS: 161.96392
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -171,11 +179,6 @@
 LAST_AUTO-CURATION: 1495210335870
 MOLECULAR_FORMULA: C6H4Cl2O
 TOTAL_EXACT_MASS: 161.963920108
-COMPOUND_NAME: 2,3-DICHLOROPHENOL
-SPECTRUM_ID: JP000006
-NOMINAL_MASS: 161.963920108
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 161.96392
 NUM PEAKS: 42
 51.0        4.43
 53.0        10.39
@@ -220,15 +223,21 @@
 165.0       2.9
 166.0       7.58
 
+NAME: 3,5-DICHLOROPHENOL
 SYNONYM: 3,5-DICHLOROPHENOL
+DB#: JP000008
 INCHIKEY: VPOMSPZBQMDLTM-UHFFFAOYSA-N
+MW: 161.963920108
 FORMULA: C6H4Cl2O
+PRECURSORMZ: 0
+ACCESSION: JP000008
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: Oc(c1)cc(Cl)cc(Cl)1
 INCHI: InChI=1S/C6H4Cl2O/c7-4-1-5(8)3-6(9)2-4/h1-3,9H
 SMILES_2: [H]OC=1C([H])=C(Cl)C([H])=C(Cl)C1[H]
+EXACT_MASS: 161.96392
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -237,11 +246,6 @@
 LAST_AUTO-CURATION: 1495210336053
 MOLECULAR_FORMULA: C6H4Cl2O
 TOTAL_EXACT_MASS: 161.963920108
-COMPOUND_NAME: 3,5-DICHLOROPHENOL
-SPECTRUM_ID: JP000008
-NOMINAL_MASS: 161.963920108
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 161.96392
 NUM PEAKS: 32
 51.0        1.24
 53.0        4.19
@@ -276,15 +280,21 @@
 165.0       4.43
 166.0       9.68
 
+NAME: 2,4,6-TRICHLOROPHENOL
 SYNONYM: 2,4,6-TRICHLOROPHENOL
+DB#: JP000010
 INCHIKEY: LINPIYWFGCPVIE-UHFFFAOYSA-N
+MW: 195.924947756
 FORMULA: C6H3Cl3O
+PRECURSORMZ: 0
+ACCESSION: JP000010
 AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
 LICENSE: CC BY-NC-SA
 INSTRUMENT: VARIAN MAT-44
 SMILES: Clc(c1)cc(Cl)c(O)c(Cl)1
 INCHI: InChI=1S/C6H3Cl3O/c7-3-1-4(8)6(10)5(9)2-3/h1-2,10H
 SMILES_2: [H]OC=1C(Cl)=C([H])C(Cl)=C([H])C1Cl
+EXACT_MASS: 195.92495
 INSTRUMENT_TYPE: EI-B
 MS_LEVEL: MS1
 IONIZATION_ENERGY: 70 eV
@@ -293,11 +303,6 @@
 LAST_AUTO-CURATION: 1495210336053
 MOLECULAR_FORMULA: C6H3Cl3O
 TOTAL_EXACT_MASS: 195.924947756
-COMPOUND_NAME: 2,4,6-TRICHLOROPHENOL
-SPECTRUM_ID: JP000010
-NOMINAL_MASS: 195.924947756
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 195.92495
 NUM PEAKS: 66
 53.0        14.63
 55.0        2.49
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/0.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split/one-per-file/0.msp Mon Feb 05 10:35:49 2024 +0000
[
@@ -0,0 +1,100 @@
+NAME: 1-NITROPYRENE
+SYNONYM: 1-NITROPYRENE
+DB#: JP000001
+INCHIKEY: ALRLPDGCPYIVHP-UHFFFAOYSA-N
+MW: 247.063328528
+FORMULA: C16H9NO2
+PRECURSORMZ: 0
+ACCESSION: JP000001
+AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
+LICENSE: CC BY-NC-SA
+INSTRUMENT: VARIAN MAT-44
+SMILES: [O-1][N+1](=O)c(c4)c(c1)c(c3c4)c(c2cc3)c(ccc2)c1
+INCHI: InChI=1S/C16H9NO2/c18-17(19)14-9-7-12-5-4-10-2-1-3-11-6-8-13(14)16(12)15(10)11/h1-9H
+SMILES_2: [H]C=1C([H])=C2C([H])=C([H])C3=C([H])C([H])=C(C=4C([H])=C([H])C(C1[H])=C2C34)N(=O)=O
+EXACT_MASS: 247.06333
+INSTRUMENT_TYPE: EI-B
+MS_LEVEL: MS1
+IONIZATION_ENERGY: 70 eV
+ION_TYPE: [M]+*
+IONIZATION_MODE: positive
+LAST_AUTO-CURATION: 1495210335755
+MOLECULAR_FORMULA: C16H9NO2
+TOTAL_EXACT_MASS: 247.063328528
+NUM PEAKS: 75
+51.0        2.66
+55.0        8.0
+57.0        7.33
+58.0        1.33
+59.0        1.33
+60.0        14.0
+61.0        1.33
+62.0        3.33
+63.0        3.33
+66.0        1.33
+68.0        8.66
+70.0        2.0
+72.0        5.33
+73.0        7.33
+74.0        3.33
+75.0        2.66
+76.0        2.0
+78.0        1.33
+80.0        4.0
+81.0        2.0
+82.0        1.33
+83.0        3.33
+86.0        12.66
+87.0        8.66
+92.0        2.0
+93.0        10.0
+94.0        6.0
+98.0        14.66
+99.0        83.33
+100.0       60.66
+104.0       4.0
+107.0       1.33
+108.0       1.33
+110.0       3.33
+112.0       1.33
+113.0       1.33
+115.0       1.33
+116.0       1.33
+120.0       1.33
+122.0       4.0
+123.0       2.66
+124.0       2.66
+125.0       2.0
+126.0       1.33
+134.0       1.33
+135.0       2.0
+137.0       1.33
+147.0       1.33
+149.0       2.0
+150.0       4.66
+151.0       3.33
+159.0       2.0
+162.0       2.0
+163.0       2.66
+173.0       2.0
+174.0       8.66
+175.0       4.66
+177.0       2.0
+187.0       5.33
+188.0       4.66
+189.0       56.66
+190.0       12.0
+191.0       16.66
+198.0       10.66
+199.0       9.33
+200.0       72.66
+201.0       99.99
+202.0       16.0
+203.0       1.33
+207.0       1.33
+214.0       1.33
+217.0       25.33
+218.0       5.33
+247.0       52.66
+248.0       10.16
+
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/1.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split/one-per-file/1.msp Mon Feb 05 10:35:49 2024 +0000
[
@@ -0,0 +1,89 @@
+NAME: 2,4-DINITROPHENOL
+SYNONYM: 2,4-DINITROPHENOL
+DB#: JP000002
+INCHIKEY: UFBJCMHMOXMLKC-UHFFFAOYSA-N
+MW: 184.01202122799998
+FORMULA: C6H4N2O5
+PRECURSORMZ: 0
+ACCESSION: JP000002
+AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
+LICENSE: CC BY-NC-SA
+INSTRUMENT: VARIAN MAT-44
+SMILES: [O-1][N+1](=O)c(c1)cc([N+1]([O-1])=O)c(O)c1
+INCHI: InChI=1S/C6H4N2O5/c9-6-2-1-4(7(10)11)3-5(6)8(12)13/h1-3,9H
+SMILES_2: [H]OC1=C([H])C([H])=C(C([H])=C1N(=O)=O)N(=O)=O
+EXACT_MASS: 184.01202
+INSTRUMENT_TYPE: EI-B
+MS_LEVEL: MS1
+IONIZATION_ENERGY: 70 eV
+ION_TYPE: [M]+*
+IONIZATION_MODE: positive
+LAST_AUTO-CURATION: 1495210335764
+MOLECULAR_FORMULA: C6H4N2O5
+TOTAL_EXACT_MASS: 184.01202122799998
+NUM PEAKS: 64
+51.0        27.22
+52.0        19.9
+53.0        61.8
+54.0        6.76
+55.0        13.95
+56.0        3.86
+57.0        11.52
+60.0        6.43
+61.0        13.38
+62.0        36.19
+63.0        61.37
+64.0        26.2
+65.0        6.74
+66.0        5.1
+67.0        7.43
+68.0        10.32
+69.0        29.16
+70.0        5.53
+71.0        6.11
+73.0        4.14
+74.0        3.92
+75.0        3.49
+76.0        4.33
+77.0        6.21
+78.0        5.1
+79.0        35.07
+80.0        9.85
+81.0        16.0
+82.0        5.37
+83.0        6.13
+84.0        2.96
+85.0        3.0
+90.0        12.01
+91.0        53.25
+92.0        28.32
+93.0        18.25
+94.0        3.51
+95.0        6.41
+96.0        5.43
+97.0        5.12
+98.0        2.43
+105.0       3.76
+106.0       6.35
+107.0       38.97
+108.0       7.11
+109.0       3.98
+111.0       2.63
+120.0       2.12
+121.0       4.45
+122.0       4.0
+123.0       3.14
+126.0       2.12
+136.0       2.77
+137.0       3.14
+138.0       3.55
+149.0       4.12
+153.0       4.02
+154.0       39.3
+155.0       3.16
+168.0       3.29
+183.0       3.26
+184.0       99.99
+185.0       8.17
+186.0       1.34
+
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/1NITROPYRENE.msp
--- a/test-data/split/one-per-file/1NITROPYRENE.msp Mon Jan 15 12:28:02 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,99 +0,0 @@
-SYNONYM: 1-NITROPYRENE
-INCHIKEY: ALRLPDGCPYIVHP-UHFFFAOYSA-N
-FORMULA: C16H9NO2
-AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
-LICENSE: CC BY-NC-SA
-INSTRUMENT: VARIAN MAT-44
-SMILES: [O-1][N+1](=O)c(c4)c(c1)c(c3c4)c(c2cc3)c(ccc2)c1
-INCHI: InChI=1S/C16H9NO2/c18-17(19)14-9-7-12-5-4-10-2-1-3-11-6-8-13(14)16(12)15(10)11/h1-9H
-SMILES_2: [H]C=1C([H])=C2C([H])=C([H])C3=C([H])C([H])=C(C=4C([H])=C([H])C(C1[H])=C2C34)N(=O)=O
-INSTRUMENT_TYPE: EI-B
-MS_LEVEL: MS1
-IONIZATION_ENERGY: 70 eV
-ION_TYPE: [M]+*
-IONIZATION_MODE: positive
-LAST_AUTO-CURATION: 1495210335755
-MOLECULAR_FORMULA: C16H9NO2
-TOTAL_EXACT_MASS: 247.063328528
-COMPOUND_NAME: 1-NITROPYRENE
-SPECTRUM_ID: JP000001
-NOMINAL_MASS: 247.063328528
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 247.06333
-NUM PEAKS: 75
-51.0        2.66
-55.0        8.0
-57.0        7.33
-58.0        1.33
-59.0        1.33
-60.0        14.0
-61.0        1.33
-62.0        3.33
-63.0        3.33
-66.0        1.33
-68.0        8.66
-70.0        2.0
-72.0        5.33
-73.0        7.33
-74.0        3.33
-75.0        2.66
-76.0        2.0
-78.0        1.33
-80.0        4.0
-81.0        2.0
-82.0        1.33
-83.0        3.33
-86.0        12.66
-87.0        8.66
-92.0        2.0
-93.0        10.0
-94.0        6.0
-98.0        14.66
-99.0        83.33
-100.0       60.66
-104.0       4.0
-107.0       1.33
-108.0       1.33
-110.0       3.33
-112.0       1.33
-113.0       1.33
-115.0       1.33
-116.0       1.33
-120.0       1.33
-122.0       4.0
-123.0       2.66
-124.0       2.66
-125.0       2.0
-126.0       1.33
-134.0       1.33
-135.0       2.0
-137.0       1.33
-147.0       1.33
-149.0       2.0
-150.0       4.66
-151.0       3.33
-159.0       2.0
-162.0       2.0
-163.0       2.66
-173.0       2.0
-174.0       8.66
-175.0       4.66
-177.0       2.0
-187.0       5.33
-188.0       4.66
-189.0       56.66
-190.0       12.0
-191.0       16.66
-198.0       10.66
-199.0       9.33
-200.0       72.66
-201.0       99.99
-202.0       16.0
-203.0       1.33
-207.0       1.33
-214.0       1.33
-217.0       25.33
-218.0       5.33
-247.0       52.66
-248.0       10.16
-
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/2.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split/one-per-file/2.msp Mon Feb 05 10:35:49 2024 +0000
[
@@ -0,0 +1,61 @@
+NAME: 3,4-DICHLOROPHENOL
+SYNONYM: 3,4-DICHLOROPHENOL
+DB#: JP000003
+INCHIKEY: WDNBURPWRNALGP-UHFFFAOYSA-N
+MW: 161.963920108
+FORMULA: C6H4Cl2O
+PRECURSORMZ: 0
+ACCESSION: JP000003
+AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
+LICENSE: CC BY-NC-SA
+INSTRUMENT: VARIAN MAT-44
+SMILES: Oc(c1)cc(Cl)c(Cl)c1
+INCHI: InChI=1S/C6H4Cl2O/c7-5-2-1-4(9)3-6(5)8/h1-3,9H
+SMILES_2: [H]OC1=C([H])C([H])=C(Cl)C(Cl)=C1[H]
+EXACT_MASS: 161.96392
+INSTRUMENT_TYPE: EI-B
+MS_LEVEL: MS1
+IONIZATION_ENERGY: 70 eV
+ION_TYPE: [M]+*
+IONIZATION_MODE: positive
+LAST_AUTO-CURATION: 1495210335820
+MOLECULAR_FORMULA: C6H4Cl2O
+TOTAL_EXACT_MASS: 161.963920108
+NUM PEAKS: 36
+51.0        2.25
+53.0        6.4
+60.0        4.13
+61.0        9.78
+62.0        20.36
+63.0        32.41
+64.0        5.58
+71.0        2.16
+72.0        8.31
+73.0        13.57
+74.0        6.23
+75.0        5.23
+81.0        8.28
+82.0        5.27
+83.0        2.81
+91.0        2.06
+97.0        6.25
+98.0        25.55
+99.0        33.74
+100.0       9.84
+101.0       12.32
+107.0       2.31
+109.0       2.08
+126.0       7.67
+127.0       3.67
+128.0       2.81
+133.0       5.09
+134.0       7.44
+135.0       3.61
+136.0       4.75
+161.0       3.6
+162.0       99.99
+163.0       8.7
+164.0       62.28
+165.0       4.54
+166.0       9.78
+
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/23DICHLOROPHENOL.msp
--- a/test-data/split/one-per-file/23DICHLOROPHENOL.msp Mon Jan 15 12:28:02 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,66 +0,0 @@
-SYNONYM: 2,3-DICHLOROPHENOL
-INCHIKEY: UMPSXRYVXUPCOS-UHFFFAOYSA-N
-FORMULA: C6H4Cl2O
-AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
-LICENSE: CC BY-NC-SA
-INSTRUMENT: VARIAN MAT-44
-SMILES: Oc(c1)c(Cl)c(Cl)cc1
-INCHI: InChI=1S/C6H4Cl2O/c7-4-2-1-3-5(9)6(4)8/h1-3,9H
-SMILES_2: [H]OC=1C([H])=C([H])C([H])=C(Cl)C1Cl
-INSTRUMENT_TYPE: EI-B
-MS_LEVEL: MS1
-IONIZATION_ENERGY: 70 eV
-ION_TYPE: [M]+*
-IONIZATION_MODE: positive
-LAST_AUTO-CURATION: 1495210335870
-MOLECULAR_FORMULA: C6H4Cl2O
-TOTAL_EXACT_MASS: 161.963920108
-COMPOUND_NAME: 2,3-DICHLOROPHENOL
-SPECTRUM_ID: JP000006
-NOMINAL_MASS: 161.963920108
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 161.96392
-NUM PEAKS: 42
-51.0        4.43
-53.0        10.39
-60.0        9.21
-61.0        24.93
-62.0        43.19
-63.0        99.99
-64.0        12.57
-65.0        4.81
-66.0        3.39
-71.0        3.67
-72.0        15.34
-73.0        25.07
-74.0        11.84
-75.0        8.79
-81.0        4.78
-82.0        3.25
-83.0        2.63
-84.0        3.87
-85.0        2.49
-87.0        5.09
-89.0        2.21
-91.0        6.02
-96.0        3.11
-97.0        12.05
-98.0        35.88
-99.0        22.09
-100.0       13.5
-101.0       6.26
-107.0       3.33
-109.0       2.73
-125.0       3.11
-126.0       59.16
-127.0       5.61
-128.0       19.32
-133.0       5.33
-135.0       2.84
-161.0       2.52
-162.0       68.96
-163.0       6.51
-164.0       51.64
-165.0       2.9
-166.0       7.58
-
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/245TRICHLOROPHENOL.msp
--- a/test-data/split/one-per-file/245TRICHLOROPHENOL.msp Mon Jan 15 12:28:02 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,89 +0,0 @@
-SYNONYM: 2,4,5-TRICHLOROPHENOL
-INCHIKEY: LHJGJYXLEPZJPM-UHFFFAOYSA-N
-FORMULA: C6H3Cl3O
-AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
-LICENSE: CC BY-NC-SA
-INSTRUMENT: VARIAN MAT-44
-SMILES: Oc(c1)c(Cl)cc(Cl)c(Cl)1
-INCHI: InChI=1S/C6H3Cl3O/c7-3-1-5(9)6(10)2-4(3)8/h1-2,10H
-SMILES_2: [H]OC1=C([H])C(Cl)=C(Cl)C([H])=C1Cl
-INSTRUMENT_TYPE: EI-B
-MS_LEVEL: MS1
-IONIZATION_ENERGY: 70 eV
-ION_TYPE: [M]+*
-IONIZATION_MODE: positive
-LAST_AUTO-CURATION: 1495210336033
-MOLECULAR_FORMULA: C6H3Cl3O
-TOTAL_EXACT_MASS: 195.924947756
-COMPOUND_NAME: 2,4,5-TRICHLOROPHENOL
-SPECTRUM_ID: JP000009
-NOMINAL_MASS: 195.924947756
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 195.92495
-NUM PEAKS: 65
-51.0        2.58
-53.0        14.73
-59.0        2.03
-60.0        12.75
-61.0        30.62
-62.0        36.79
-63.0        19.11
-64.0        2.15
-65.0        5.23
-66.0        13.42
-67.0        7.46
-69.0        2.46
-71.0        6.55
-72.0        13.85
-73.0        16.02
-74.0        7.55
-75.0        4.47
-79.0        2.34
-80.0        8.06
-81.0        5.21
-82.0        3.22
-83.0        7.1
-84.0        6.05
-85.0        6.38
-86.0        2.53
-87.0        3.44
-89.0        1.93
-95.0        3.8
-96.0        33.63
-97.0        67.27
-98.0        25.02
-99.0        31.7
-100.0       5.86
-106.0       2.03
-107.0       8.66
-108.0       3.94
-109.0       6.55
-131.0       12.51
-132.0       48.06
-133.0       32.0
-134.0       33.42
-135.0       18.37
-136.0       6.55
-137.0       2.96
-149.0       6.48
-151.0       3.39
-160.0       10.69
-161.0       4.76
-162.0       10.76
-163.0       3.58
-164.0       3.61
-167.0       4.06
-169.0       3.89
-177.0       4.76
-179.0       2.94
-192.0       6.69
-194.0       4.64
-195.0       6.79
-196.0       99.99
-197.0       11.45
-198.0       92.58
-199.0       7.82
-200.0       29.54
-201.0       2.08
-202.0       3.15
-
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/246TRICHLOROPHENOL.msp
--- a/test-data/split/one-per-file/246TRICHLOROPHENOL.msp Mon Jan 15 12:28:02 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,90 +0,0 @@
-SYNONYM: 2,4,6-TRICHLOROPHENOL
-INCHIKEY: LINPIYWFGCPVIE-UHFFFAOYSA-N
-FORMULA: C6H3Cl3O
-AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
-LICENSE: CC BY-NC-SA
-INSTRUMENT: VARIAN MAT-44
-SMILES: Clc(c1)cc(Cl)c(O)c(Cl)1
-INCHI: InChI=1S/C6H3Cl3O/c7-3-1-4(8)6(10)5(9)2-3/h1-2,10H
-SMILES_2: [H]OC=1C(Cl)=C([H])C(Cl)=C([H])C1Cl
-INSTRUMENT_TYPE: EI-B
-MS_LEVEL: MS1
-IONIZATION_ENERGY: 70 eV
-ION_TYPE: [M]+*
-IONIZATION_MODE: positive
-LAST_AUTO-CURATION: 1495210336053
-MOLECULAR_FORMULA: C6H3Cl3O
-TOTAL_EXACT_MASS: 195.924947756
-COMPOUND_NAME: 2,4,6-TRICHLOROPHENOL
-SPECTRUM_ID: JP000010
-NOMINAL_MASS: 195.924947756
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 195.92495
-NUM PEAKS: 66
-53.0        14.63
-55.0        2.49
-57.0        2.2
-60.0        12.21
-61.0        32.06
-62.0        42.22
-63.0        36.9
-64.0        4.32
-65.0        8.43
-66.0        23.0
-67.0        12.65
-68.0        2.71
-71.0        6.78
-72.0        13.68
-73.0        17.64
-74.0        8.84
-75.0        5.57
-80.0        9.94
-81.0        8.84
-82.0        4.21
-83.0        8.62
-84.0        6.16
-85.0        5.83
-87.0        3.92
-89.0        2.2
-90.0        2.89
-91.0        2.09
-95.0        4.84
-96.0        34.11
-97.0        70.76
-98.0        39.72
-99.0        38.18
-100.0       10.63
-101.0       2.64
-106.0       2.45
-107.0       9.09
-108.0       3.77
-109.0       7.22
-111.0       2.23
-125.0       3.44
-126.0       8.91
-127.0       2.05
-128.0       3.52
-131.0       18.48
-132.0       57.96
-133.0       22.12
-134.0       40.71
-135.0       10.45
-136.0       7.81
-160.0       31.84
-161.0       5.2
-162.0       50.47
-163.0       5.2
-164.0       22.81
-166.0       5.57
-167.0       4.1
-168.0       2.56
-169.0       3.63
-195.0       3.59
-196.0       99.99
-197.0       9.68
-198.0       91.34
-199.0       7.07
-200.0       28.42
-201.0       2.09
-202.0       3.04
-
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/24DICHLOROPHENOL.msp
--- a/test-data/split/one-per-file/24DICHLOROPHENOL.msp Mon Jan 15 12:28:02 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,61 +0,0 @@
-SYNONYM: 2,4-DICHLOROPHENOL
-INCHIKEY: HFZWRUODUSTPEG-UHFFFAOYSA-N
-FORMULA: C6H4Cl2O
-AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
-LICENSE: CC BY-NC-SA
-INSTRUMENT: VARIAN MAT-44
-SMILES: Oc(c1)c(Cl)cc(Cl)c1
-INCHI: InChI=1S/C6H4Cl2O/c7-4-1-2-6(9)5(8)3-4/h1-3,9H
-SMILES_2: [H]OC1=C([H])C([H])=C(Cl)C([H])=C1Cl
-INSTRUMENT_TYPE: EI-B
-MS_LEVEL: MS1
-IONIZATION_ENERGY: 70 eV
-ION_TYPE: [M]+*
-IONIZATION_MODE: positive
-LAST_AUTO-CURATION: 1495210335864
-MOLECULAR_FORMULA: C6H4Cl2O
-TOTAL_EXACT_MASS: 161.963920108
-COMPOUND_NAME: 2,4-DICHLOROPHENOL
-SPECTRUM_ID: JP000007
-NOMINAL_MASS: 161.963920108
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 161.96392
-NUM PEAKS: 37
-51.0        3.07
-53.0        12.34
-60.0        6.21
-61.0        19.31
-62.0        35.08
-63.0        99.99
-64.0        10.24
-66.0        2.25
-71.0        3.05
-72.0        10.59
-73.0        19.52
-74.0        8.59
-75.0        6.44
-81.0        6.82
-82.0        4.45
-83.0        2.77
-84.0        2.03
-91.0        2.34
-96.0        3.78
-97.0        31.79
-98.0        38.03
-99.0        21.59
-100.0       13.06
-101.0       4.67
-125.0       4.82
-126.0       20.32
-127.0       3.76
-128.0       7.38
-133.0       4.02
-134.0       2.72
-135.0       2.64
-161.0       19.22
-162.0       94.19
-163.0       15.34
-164.0       55.32
-165.0       5.54
-166.0       9.19
-
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/24DINITROPHENOL.msp
--- a/test-data/split/one-per-file/24DINITROPHENOL.msp Mon Jan 15 12:28:02 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,88 +0,0 @@
-SYNONYM: 2,4-DINITROPHENOL
-INCHIKEY: UFBJCMHMOXMLKC-UHFFFAOYSA-N
-FORMULA: C6H4N2O5
-AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
-LICENSE: CC BY-NC-SA
-INSTRUMENT: VARIAN MAT-44
-SMILES: [O-1][N+1](=O)c(c1)cc([N+1]([O-1])=O)c(O)c1
-INCHI: InChI=1S/C6H4N2O5/c9-6-2-1-4(7(10)11)3-5(6)8(12)13/h1-3,9H
-SMILES_2: [H]OC1=C([H])C([H])=C(C([H])=C1N(=O)=O)N(=O)=O
-INSTRUMENT_TYPE: EI-B
-MS_LEVEL: MS1
-IONIZATION_ENERGY: 70 eV
-ION_TYPE: [M]+*
-IONIZATION_MODE: positive
-LAST_AUTO-CURATION: 1495210335764
-MOLECULAR_FORMULA: C6H4N2O5
-TOTAL_EXACT_MASS: 184.01202122799998
-COMPOUND_NAME: 2,4-DINITROPHENOL
-SPECTRUM_ID: JP000002
-NOMINAL_MASS: 184.01202122799998
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 184.01202
-NUM PEAKS: 64
-51.0        27.22
-52.0        19.9
-53.0        61.8
-54.0        6.76
-55.0        13.95
-56.0        3.86
-57.0        11.52
-60.0        6.43
-61.0        13.38
-62.0        36.19
-63.0        61.37
-64.0        26.2
-65.0        6.74
-66.0        5.1
-67.0        7.43
-68.0        10.32
-69.0        29.16
-70.0        5.53
-71.0        6.11
-73.0        4.14
-74.0        3.92
-75.0        3.49
-76.0        4.33
-77.0        6.21
-78.0        5.1
-79.0        35.07
-80.0        9.85
-81.0        16.0
-82.0        5.37
-83.0        6.13
-84.0        2.96
-85.0        3.0
-90.0        12.01
-91.0        53.25
-92.0        28.32
-93.0        18.25
-94.0        3.51
-95.0        6.41
-96.0        5.43
-97.0        5.12
-98.0        2.43
-105.0       3.76
-106.0       6.35
-107.0       38.97
-108.0       7.11
-109.0       3.98
-111.0       2.63
-120.0       2.12
-121.0       4.45
-122.0       4.0
-123.0       3.14
-126.0       2.12
-136.0       2.77
-137.0       3.14
-138.0       3.55
-149.0       4.12
-153.0       4.02
-154.0       39.3
-155.0       3.16
-168.0       3.29
-183.0       3.26
-184.0       99.99
-185.0       8.17
-186.0       1.34
-
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/25DICHLOROPHENOL.msp
--- a/test-data/split/one-per-file/25DICHLOROPHENOL.msp Mon Jan 15 12:28:02 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,68 +0,0 @@
-SYNONYM: 2,5-DICHLOROPHENOL
-INCHIKEY: RANCECPPZPIPNO-UHFFFAOYSA-N
-FORMULA: C6H4Cl2O
-AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
-LICENSE: CC BY-NC-SA
-INSTRUMENT: VARIAN MAT-44
-SMILES: Oc(c1)c(Cl)ccc(Cl)1
-INCHI: InChI=1S/C6H4Cl2O/c7-4-1-2-5(8)6(9)3-4/h1-3,9H
-SMILES_2: [H]OC1=C([H])C(Cl)=C([H])C([H])=C1Cl
-INSTRUMENT_TYPE: EI-B
-MS_LEVEL: MS1
-IONIZATION_ENERGY: 70 eV
-ION_TYPE: [M]+*
-IONIZATION_MODE: positive
-LAST_AUTO-CURATION: 1495210335825
-MOLECULAR_FORMULA: C6H4Cl2O
-TOTAL_EXACT_MASS: 161.963920108
-COMPOUND_NAME: 2,5-DICHLOROPHENOL
-SPECTRUM_ID: JP000004
-NOMINAL_MASS: 161.963920108
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 161.96392
-NUM PEAKS: 44
-51.0        5.05
-52.0        2.29
-53.0        22.87
-59.0        3.69
-60.0        16.58
-61.0        33.26
-62.0        62.1
-63.0        99.99
-64.0        11.61
-65.0        2.73
-66.0        4.11
-71.0        2.98
-72.0        12.03
-73.0        32.28
-74.0        12.69
-75.0        11.42
-81.0        6.65
-82.0        4.64
-83.0        3.82
-84.0        3.02
-85.0        2.81
-87.0        2.86
-89.0        2.17
-90.0        2.05
-91.0        6.28
-96.0        3.57
-97.0        15.64
-98.0        39.0
-99.0        33.72
-100.0       13.84
-101.0       10.87
-126.0       9.01
-127.0       3.11
-128.0       3.25
-133.0       6.28
-134.0       4.28
-135.0       4.21
-136.0       2.59
-161.0       11.74
-162.0       89.04
-163.0       12.37
-164.0       52.89
-165.0       4.62
-166.0       8.78
-
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/26DICHLOROPHENOL.msp
--- a/test-data/split/one-per-file/26DICHLOROPHENOL.msp Mon Jan 15 12:28:02 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,57 +0,0 @@
-SYNONYM: 2,6-DICHLOROPHENOL
-INCHIKEY: HOLHYSJJBXSLMV-UHFFFAOYSA-N
-FORMULA: C6H4Cl2O
-AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
-LICENSE: CC BY-NC-SA
-INSTRUMENT: VARIAN MAT-44
-SMILES: Clc(c1)c(O)c(Cl)cc1
-INCHI: InChI=1S/C6H4Cl2O/c7-4-2-1-3-5(8)6(4)9/h1-3,9H
-SMILES_2: [H]OC=1C(Cl)=C([H])C([H])=C([H])C1Cl
-INSTRUMENT_TYPE: EI-B
-MS_LEVEL: MS1
-IONIZATION_ENERGY: 70 eV
-ION_TYPE: [M]+*
-IONIZATION_MODE: positive
-LAST_AUTO-CURATION: 1495210335848
-MOLECULAR_FORMULA: C6H4Cl2O
-TOTAL_EXACT_MASS: 161.963920108
-COMPOUND_NAME: 2,6-DICHLOROPHENOL
-SPECTRUM_ID: JP000005
-NOMINAL_MASS: 161.963920108
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 161.96392
-NUM PEAKS: 33
-53.0        7.25
-60.0        3.0
-61.0        8.88
-62.0        17.84
-63.0        70.92
-64.0        8.02
-65.0        2.01
-72.0        5.48
-73.0        12.35
-74.0        4.63
-75.0        4.81
-81.0        6.73
-82.0        4.37
-83.0        2.09
-91.0        3.83
-97.0        7.27
-98.0        34.04
-99.0        15.04
-100.0       13.17
-101.0       4.37
-107.0       2.61
-125.0       2.01
-126.0       33.42
-127.0       3.34
-128.0       11.41
-133.0       3.34
-135.0       2.17
-161.0       2.35
-162.0       99.99
-163.0       8.23
-164.0       63.43
-165.0       4.35
-166.0       9.91
-
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/3.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split/one-per-file/3.msp Mon Feb 05 10:35:49 2024 +0000
[
@@ -0,0 +1,69 @@
+NAME: 2,5-DICHLOROPHENOL
+SYNONYM: 2,5-DICHLOROPHENOL
+DB#: JP000004
+INCHIKEY: RANCECPPZPIPNO-UHFFFAOYSA-N
+MW: 161.963920108
+FORMULA: C6H4Cl2O
+PRECURSORMZ: 0
+ACCESSION: JP000004
+AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
+LICENSE: CC BY-NC-SA
+INSTRUMENT: VARIAN MAT-44
+SMILES: Oc(c1)c(Cl)ccc(Cl)1
+INCHI: InChI=1S/C6H4Cl2O/c7-4-1-2-5(8)6(9)3-4/h1-3,9H
+SMILES_2: [H]OC1=C([H])C(Cl)=C([H])C([H])=C1Cl
+EXACT_MASS: 161.96392
+INSTRUMENT_TYPE: EI-B
+MS_LEVEL: MS1
+IONIZATION_ENERGY: 70 eV
+ION_TYPE: [M]+*
+IONIZATION_MODE: positive
+LAST_AUTO-CURATION: 1495210335825
+MOLECULAR_FORMULA: C6H4Cl2O
+TOTAL_EXACT_MASS: 161.963920108
+NUM PEAKS: 44
+51.0        5.05
+52.0        2.29
+53.0        22.87
+59.0        3.69
+60.0        16.58
+61.0        33.26
+62.0        62.1
+63.0        99.99
+64.0        11.61
+65.0        2.73
+66.0        4.11
+71.0        2.98
+72.0        12.03
+73.0        32.28
+74.0        12.69
+75.0        11.42
+81.0        6.65
+82.0        4.64
+83.0        3.82
+84.0        3.02
+85.0        2.81
+87.0        2.86
+89.0        2.17
+90.0        2.05
+91.0        6.28
+96.0        3.57
+97.0        15.64
+98.0        39.0
+99.0        33.72
+100.0       13.84
+101.0       10.87
+126.0       9.01
+127.0       3.11
+128.0       3.25
+133.0       6.28
+134.0       4.28
+135.0       4.21
+136.0       2.59
+161.0       11.74
+162.0       89.04
+163.0       12.37
+164.0       52.89
+165.0       4.62
+166.0       8.78
+
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/34DICHLOROPHENOL.msp
--- a/test-data/split/one-per-file/34DICHLOROPHENOL.msp Mon Jan 15 12:28:02 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,60 +0,0 @@
-SYNONYM: 3,4-DICHLOROPHENOL
-INCHIKEY: WDNBURPWRNALGP-UHFFFAOYSA-N
-FORMULA: C6H4Cl2O
-AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
-LICENSE: CC BY-NC-SA
-INSTRUMENT: VARIAN MAT-44
-SMILES: Oc(c1)cc(Cl)c(Cl)c1
-INCHI: InChI=1S/C6H4Cl2O/c7-5-2-1-4(9)3-6(5)8/h1-3,9H
-SMILES_2: [H]OC1=C([H])C([H])=C(Cl)C(Cl)=C1[H]
-INSTRUMENT_TYPE: EI-B
-MS_LEVEL: MS1
-IONIZATION_ENERGY: 70 eV
-ION_TYPE: [M]+*
-IONIZATION_MODE: positive
-LAST_AUTO-CURATION: 1495210335820
-MOLECULAR_FORMULA: C6H4Cl2O
-TOTAL_EXACT_MASS: 161.963920108
-COMPOUND_NAME: 3,4-DICHLOROPHENOL
-SPECTRUM_ID: JP000003
-NOMINAL_MASS: 161.963920108
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 161.96392
-NUM PEAKS: 36
-51.0        2.25
-53.0        6.4
-60.0        4.13
-61.0        9.78
-62.0        20.36
-63.0        32.41
-64.0        5.58
-71.0        2.16
-72.0        8.31
-73.0        13.57
-74.0        6.23
-75.0        5.23
-81.0        8.28
-82.0        5.27
-83.0        2.81
-91.0        2.06
-97.0        6.25
-98.0        25.55
-99.0        33.74
-100.0       9.84
-101.0       12.32
-107.0       2.31
-109.0       2.08
-126.0       7.67
-127.0       3.67
-128.0       2.81
-133.0       5.09
-134.0       7.44
-135.0       3.61
-136.0       4.75
-161.0       3.6
-162.0       99.99
-163.0       8.7
-164.0       62.28
-165.0       4.54
-166.0       9.78
-
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/35DICHLOROPHENOL.msp
--- a/test-data/split/one-per-file/35DICHLOROPHENOL.msp Mon Jan 15 12:28:02 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,56 +0,0 @@
-SYNONYM: 3,5-DICHLOROPHENOL
-INCHIKEY: VPOMSPZBQMDLTM-UHFFFAOYSA-N
-FORMULA: C6H4Cl2O
-AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
-LICENSE: CC BY-NC-SA
-INSTRUMENT: VARIAN MAT-44
-SMILES: Oc(c1)cc(Cl)cc(Cl)1
-INCHI: InChI=1S/C6H4Cl2O/c7-4-1-5(8)3-6(9)2-4/h1-3,9H
-SMILES_2: [H]OC=1C([H])=C(Cl)C([H])=C(Cl)C1[H]
-INSTRUMENT_TYPE: EI-B
-MS_LEVEL: MS1
-IONIZATION_ENERGY: 70 eV
-ION_TYPE: [M]+*
-IONIZATION_MODE: positive
-LAST_AUTO-CURATION: 1495210336053
-MOLECULAR_FORMULA: C6H4Cl2O
-TOTAL_EXACT_MASS: 161.963920108
-COMPOUND_NAME: 3,5-DICHLOROPHENOL
-SPECTRUM_ID: JP000008
-NOMINAL_MASS: 161.963920108
-PRECURSOR_MZ: 0.0
-PARENT_MASS: 161.96392
-NUM PEAKS: 32
-51.0        1.24
-53.0        4.19
-60.0        3.61
-61.0        8.59
-62.0        16.38
-63.0        31.53
-64.0        4.94
-72.0        4.88
-73.0        10.01
-74.0        4.53
-75.0        3.92
-81.0        6.85
-82.0        4.37
-83.0        2.46
-97.0        7.3
-98.0        27.86
-99.0        28.43
-100.0       10.31
-101.0       9.88
-126.0       8.1
-127.0       4.51
-128.0       3.3
-133.0       4.08
-134.0       6.58
-135.0       2.96
-136.0       4.15
-161.0       3.31
-162.0       99.99
-163.0       8.57
-164.0       60.06
-165.0       4.43
-166.0       9.68
-
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/4.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split/one-per-file/4.msp Mon Feb 05 10:35:49 2024 +0000
[
@@ -0,0 +1,58 @@
+NAME: 2,6-DICHLOROPHENOL
+SYNONYM: 2,6-DICHLOROPHENOL
+DB#: JP000005
+INCHIKEY: HOLHYSJJBXSLMV-UHFFFAOYSA-N
+MW: 161.963920108
+FORMULA: C6H4Cl2O
+PRECURSORMZ: 0
+ACCESSION: JP000005
+AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
+LICENSE: CC BY-NC-SA
+INSTRUMENT: VARIAN MAT-44
+SMILES: Clc(c1)c(O)c(Cl)cc1
+INCHI: InChI=1S/C6H4Cl2O/c7-4-2-1-3-5(8)6(4)9/h1-3,9H
+SMILES_2: [H]OC=1C(Cl)=C([H])C([H])=C([H])C1Cl
+EXACT_MASS: 161.96392
+INSTRUMENT_TYPE: EI-B
+MS_LEVEL: MS1
+IONIZATION_ENERGY: 70 eV
+ION_TYPE: [M]+*
+IONIZATION_MODE: positive
+LAST_AUTO-CURATION: 1495210335848
+MOLECULAR_FORMULA: C6H4Cl2O
+TOTAL_EXACT_MASS: 161.963920108
+NUM PEAKS: 33
+53.0        7.25
+60.0        3.0
+61.0        8.88
+62.0        17.84
+63.0        70.92
+64.0        8.02
+65.0        2.01
+72.0        5.48
+73.0        12.35
+74.0        4.63
+75.0        4.81
+81.0        6.73
+82.0        4.37
+83.0        2.09
+91.0        3.83
+97.0        7.27
+98.0        34.04
+99.0        15.04
+100.0       13.17
+101.0       4.37
+107.0       2.61
+125.0       2.01
+126.0       33.42
+127.0       3.34
+128.0       11.41
+133.0       3.34
+135.0       2.17
+161.0       2.35
+162.0       99.99
+163.0       8.23
+164.0       63.43
+165.0       4.35
+166.0       9.91
+
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/5.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split/one-per-file/5.msp Mon Feb 05 10:35:49 2024 +0000
[
@@ -0,0 +1,67 @@
+NAME: 2,3-DICHLOROPHENOL
+SYNONYM: 2,3-DICHLOROPHENOL
+DB#: JP000006
+INCHIKEY: UMPSXRYVXUPCOS-UHFFFAOYSA-N
+MW: 161.963920108
+FORMULA: C6H4Cl2O
+PRECURSORMZ: 0
+ACCESSION: JP000006
+AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
+LICENSE: CC BY-NC-SA
+INSTRUMENT: VARIAN MAT-44
+SMILES: Oc(c1)c(Cl)c(Cl)cc1
+INCHI: InChI=1S/C6H4Cl2O/c7-4-2-1-3-5(9)6(4)8/h1-3,9H
+SMILES_2: [H]OC=1C([H])=C([H])C([H])=C(Cl)C1Cl
+EXACT_MASS: 161.96392
+INSTRUMENT_TYPE: EI-B
+MS_LEVEL: MS1
+IONIZATION_ENERGY: 70 eV
+ION_TYPE: [M]+*
+IONIZATION_MODE: positive
+LAST_AUTO-CURATION: 1495210335870
+MOLECULAR_FORMULA: C6H4Cl2O
+TOTAL_EXACT_MASS: 161.963920108
+NUM PEAKS: 42
+51.0        4.43
+53.0        10.39
+60.0        9.21
+61.0        24.93
+62.0        43.19
+63.0        99.99
+64.0        12.57
+65.0        4.81
+66.0        3.39
+71.0        3.67
+72.0        15.34
+73.0        25.07
+74.0        11.84
+75.0        8.79
+81.0        4.78
+82.0        3.25
+83.0        2.63
+84.0        3.87
+85.0        2.49
+87.0        5.09
+89.0        2.21
+91.0        6.02
+96.0        3.11
+97.0        12.05
+98.0        35.88
+99.0        22.09
+100.0       13.5
+101.0       6.26
+107.0       3.33
+109.0       2.73
+125.0       3.11
+126.0       59.16
+127.0       5.61
+128.0       19.32
+133.0       5.33
+135.0       2.84
+161.0       2.52
+162.0       68.96
+163.0       6.51
+164.0       51.64
+165.0       2.9
+166.0       7.58
+
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/6.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split/one-per-file/6.msp Mon Feb 05 10:35:49 2024 +0000
[
@@ -0,0 +1,62 @@
+NAME: 2,4-DICHLOROPHENOL
+SYNONYM: 2,4-DICHLOROPHENOL
+DB#: JP000007
+INCHIKEY: HFZWRUODUSTPEG-UHFFFAOYSA-N
+MW: 161.963920108
+FORMULA: C6H4Cl2O
+PRECURSORMZ: 0
+ACCESSION: JP000007
+AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
+LICENSE: CC BY-NC-SA
+INSTRUMENT: VARIAN MAT-44
+SMILES: Oc(c1)c(Cl)cc(Cl)c1
+INCHI: InChI=1S/C6H4Cl2O/c7-4-1-2-6(9)5(8)3-4/h1-3,9H
+SMILES_2: [H]OC1=C([H])C([H])=C(Cl)C([H])=C1Cl
+EXACT_MASS: 161.96392
+INSTRUMENT_TYPE: EI-B
+MS_LEVEL: MS1
+IONIZATION_ENERGY: 70 eV
+ION_TYPE: [M]+*
+IONIZATION_MODE: positive
+LAST_AUTO-CURATION: 1495210335864
+MOLECULAR_FORMULA: C6H4Cl2O
+TOTAL_EXACT_MASS: 161.963920108
+NUM PEAKS: 37
+51.0        3.07
+53.0        12.34
+60.0        6.21
+61.0        19.31
+62.0        35.08
+63.0        99.99
+64.0        10.24
+66.0        2.25
+71.0        3.05
+72.0        10.59
+73.0        19.52
+74.0        8.59
+75.0        6.44
+81.0        6.82
+82.0        4.45
+83.0        2.77
+84.0        2.03
+91.0        2.34
+96.0        3.78
+97.0        31.79
+98.0        38.03
+99.0        21.59
+100.0       13.06
+101.0       4.67
+125.0       4.82
+126.0       20.32
+127.0       3.76
+128.0       7.38
+133.0       4.02
+134.0       2.72
+135.0       2.64
+161.0       19.22
+162.0       94.19
+163.0       15.34
+164.0       55.32
+165.0       5.54
+166.0       9.19
+
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/7.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split/one-per-file/7.msp Mon Feb 05 10:35:49 2024 +0000
[
@@ -0,0 +1,57 @@
+NAME: 3,5-DICHLOROPHENOL
+SYNONYM: 3,5-DICHLOROPHENOL
+DB#: JP000008
+INCHIKEY: VPOMSPZBQMDLTM-UHFFFAOYSA-N
+MW: 161.963920108
+FORMULA: C6H4Cl2O
+PRECURSORMZ: 0
+ACCESSION: JP000008
+AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
+LICENSE: CC BY-NC-SA
+INSTRUMENT: VARIAN MAT-44
+SMILES: Oc(c1)cc(Cl)cc(Cl)1
+INCHI: InChI=1S/C6H4Cl2O/c7-4-1-5(8)3-6(9)2-4/h1-3,9H
+SMILES_2: [H]OC=1C([H])=C(Cl)C([H])=C(Cl)C1[H]
+EXACT_MASS: 161.96392
+INSTRUMENT_TYPE: EI-B
+MS_LEVEL: MS1
+IONIZATION_ENERGY: 70 eV
+ION_TYPE: [M]+*
+IONIZATION_MODE: positive
+LAST_AUTO-CURATION: 1495210336053
+MOLECULAR_FORMULA: C6H4Cl2O
+TOTAL_EXACT_MASS: 161.963920108
+NUM PEAKS: 32
+51.0        1.24
+53.0        4.19
+60.0        3.61
+61.0        8.59
+62.0        16.38
+63.0        31.53
+64.0        4.94
+72.0        4.88
+73.0        10.01
+74.0        4.53
+75.0        3.92
+81.0        6.85
+82.0        4.37
+83.0        2.46
+97.0        7.3
+98.0        27.86
+99.0        28.43
+100.0       10.31
+101.0       9.88
+126.0       8.1
+127.0       4.51
+128.0       3.3
+133.0       4.08
+134.0       6.58
+135.0       2.96
+136.0       4.15
+161.0       3.31
+162.0       99.99
+163.0       8.57
+164.0       60.06
+165.0       4.43
+166.0       9.68
+
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/8.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split/one-per-file/8.msp Mon Feb 05 10:35:49 2024 +0000
[
@@ -0,0 +1,90 @@
+NAME: 2,4,5-TRICHLOROPHENOL
+SYNONYM: 2,4,5-TRICHLOROPHENOL
+DB#: JP000009
+INCHIKEY: LHJGJYXLEPZJPM-UHFFFAOYSA-N
+MW: 195.924947756
+FORMULA: C6H3Cl3O
+PRECURSORMZ: 0
+ACCESSION: JP000009
+AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
+LICENSE: CC BY-NC-SA
+INSTRUMENT: VARIAN MAT-44
+SMILES: Oc(c1)c(Cl)cc(Cl)c(Cl)1
+INCHI: InChI=1S/C6H3Cl3O/c7-3-1-5(9)6(10)2-4(3)8/h1-2,10H
+SMILES_2: [H]OC1=C([H])C(Cl)=C(Cl)C([H])=C1Cl
+EXACT_MASS: 195.92495
+INSTRUMENT_TYPE: EI-B
+MS_LEVEL: MS1
+IONIZATION_ENERGY: 70 eV
+ION_TYPE: [M]+*
+IONIZATION_MODE: positive
+LAST_AUTO-CURATION: 1495210336033
+MOLECULAR_FORMULA: C6H3Cl3O
+TOTAL_EXACT_MASS: 195.924947756
+NUM PEAKS: 65
+51.0        2.58
+53.0        14.73
+59.0        2.03
+60.0        12.75
+61.0        30.62
+62.0        36.79
+63.0        19.11
+64.0        2.15
+65.0        5.23
+66.0        13.42
+67.0        7.46
+69.0        2.46
+71.0        6.55
+72.0        13.85
+73.0        16.02
+74.0        7.55
+75.0        4.47
+79.0        2.34
+80.0        8.06
+81.0        5.21
+82.0        3.22
+83.0        7.1
+84.0        6.05
+85.0        6.38
+86.0        2.53
+87.0        3.44
+89.0        1.93
+95.0        3.8
+96.0        33.63
+97.0        67.27
+98.0        25.02
+99.0        31.7
+100.0       5.86
+106.0       2.03
+107.0       8.66
+108.0       3.94
+109.0       6.55
+131.0       12.51
+132.0       48.06
+133.0       32.0
+134.0       33.42
+135.0       18.37
+136.0       6.55
+137.0       2.96
+149.0       6.48
+151.0       3.39
+160.0       10.69
+161.0       4.76
+162.0       10.76
+163.0       3.58
+164.0       3.61
+167.0       4.06
+169.0       3.89
+177.0       4.76
+179.0       2.94
+192.0       6.69
+194.0       4.64
+195.0       6.79
+196.0       99.99
+197.0       11.45
+198.0       92.58
+199.0       7.82
+200.0       29.54
+201.0       2.08
+202.0       3.15
+
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/split/one-per-file/9.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split/one-per-file/9.msp Mon Feb 05 10:35:49 2024 +0000
[
@@ -0,0 +1,91 @@
+NAME: 2,4,6-TRICHLOROPHENOL
+SYNONYM: 2,4,6-TRICHLOROPHENOL
+DB#: JP000010
+INCHIKEY: LINPIYWFGCPVIE-UHFFFAOYSA-N
+MW: 195.924947756
+FORMULA: C6H3Cl3O
+PRECURSORMZ: 0
+ACCESSION: JP000010
+AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
+LICENSE: CC BY-NC-SA
+INSTRUMENT: VARIAN MAT-44
+SMILES: Clc(c1)cc(Cl)c(O)c(Cl)1
+INCHI: InChI=1S/C6H3Cl3O/c7-3-1-4(8)6(10)5(9)2-3/h1-2,10H
+SMILES_2: [H]OC=1C(Cl)=C([H])C(Cl)=C([H])C1Cl
+EXACT_MASS: 195.92495
+INSTRUMENT_TYPE: EI-B
+MS_LEVEL: MS1
+IONIZATION_ENERGY: 70 eV
+ION_TYPE: [M]+*
+IONIZATION_MODE: positive
+LAST_AUTO-CURATION: 1495210336053
+MOLECULAR_FORMULA: C6H3Cl3O
+TOTAL_EXACT_MASS: 195.924947756
+NUM PEAKS: 66
+53.0        14.63
+55.0        2.49
+57.0        2.2
+60.0        12.21
+61.0        32.06
+62.0        42.22
+63.0        36.9
+64.0        4.32
+65.0        8.43
+66.0        23.0
+67.0        12.65
+68.0        2.71
+71.0        6.78
+72.0        13.68
+73.0        17.64
+74.0        8.84
+75.0        5.57
+80.0        9.94
+81.0        8.84
+82.0        4.21
+83.0        8.62
+84.0        6.16
+85.0        5.83
+87.0        3.92
+89.0        2.2
+90.0        2.89
+91.0        2.09
+95.0        4.84
+96.0        34.11
+97.0        70.76
+98.0        39.72
+99.0        38.18
+100.0       10.63
+101.0       2.64
+106.0       2.45
+107.0       9.09
+108.0       3.77
+109.0       7.22
+111.0       2.23
+125.0       3.44
+126.0       8.91
+127.0       2.05
+128.0       3.52
+131.0       18.48
+132.0       57.96
+133.0       22.12
+134.0       40.71
+135.0       10.45
+136.0       7.81
+160.0       31.84
+161.0       5.2
+162.0       50.47
+163.0       5.2
+164.0       22.81
+166.0       5.57
+167.0       4.1
+168.0       2.56
+169.0       3.63
+195.0       3.59
+196.0       99.99
+197.0       9.68
+198.0       91.34
+199.0       7.07
+200.0       28.42
+201.0       2.09
+202.0       3.04
+
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/subsetting/identifier.csv
--- a/test-data/subsetting/identifier.csv Mon Jan 15 12:28:02 2024 +0000
+++ b/test-data/subsetting/identifier.csv Mon Feb 05 10:35:49 2024 +0000
b
@@ -1,4 +1,4 @@
-COMPOUND_NAME
+NAME
 C001
 C002
 C004
\ No newline at end of file
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/subsetting/subsetting_output.msp
--- a/test-data/subsetting/subsetting_output.msp Mon Jan 15 12:28:02 2024 +0000
+++ b/test-data/subsetting/subsetting_output.msp Mon Feb 05 10:35:49 2024 +0000
b
@@ -1,9 +1,8 @@
+NAME: C001
 IONMODE: Negative
+RETENTIONTIME: 38.74
+RETENTIONINDEX: -1
 SPECTRUMTYPE: Centroid
-COMPOUND_NAME: C001
-RETENTION_TIME: 38.74
-RETENTION_INDEX: -1
-TOOL_USED: matchms
 NUM PEAKS: 57
 138.9121    10186226.0
 148.9337    1008656.0
@@ -63,12 +62,11 @@
 676.6436    1982714.0
 800.4451    2792137.0
 
+NAME: C002
 IONMODE: Negative
+RETENTIONTIME: 520.25
+RETENTIONINDEX: 1234.5
 SPECTRUMTYPE: Centroid
-COMPOUND_NAME: C002
-RETENTION_TIME: 520.25
-RETENTION_INDEX: 1234.5
-TOOL_USED: matchms
 NUM PEAKS: 35
 131.1733    1971789.0
 267.2688    6103973.0
@@ -106,11 +104,10 @@
 1216.8041   4439324.0
 1217.807    3565334.0
 
+NAME: C004
 IONMODE: Negative
+RETENTIONTIME: 473.48
 SPECTRUMTYPE: Centroid
-COMPOUND_NAME: C004
-RETENTION_TIME: 473.48
-TOOL_USED: matchms
 NUM PEAKS: 24
 124.1405    6517662.0
 170.2437    1237313.0
b
diff -r fc1bc38ede0b -r 114617e6ad33 test-data/subsetting/subsetting_output2.msp
--- a/test-data/subsetting/subsetting_output2.msp Mon Jan 15 12:28:02 2024 +0000
+++ b/test-data/subsetting/subsetting_output2.msp Mon Feb 05 10:35:49 2024 +0000
b
@@ -1,8 +1,7 @@
+NAME: C003
 IONMODE: Negative
+RETENTIONTIME: 483.67
 SPECTRUMTYPE: Centroid
-COMPOUND_NAME: C003
-RETENTION_TIME: 483.67
-TOOL_USED: matchms
 NUM PEAKS: 26
 265.2529    11366224.0
 266.2564    1420444.0
@@ -31,11 +30,10 @@
 1071.1639   15461047.0
 1072.1671   5096642.0
 
+NAME: C005
 IONMODE: Negative
+RETENTIONTIME: 41.72
 SPECTRUMTYPE: Centroid
-COMPOUND_NAME: C005
-RETENTION_TIME: 41.72
-TOOL_USED: matchms
 NUM PEAKS: 20
 218.1386    14009249.0
 337.0623    88672453.0