Previous changeset 0:e5010b19d64d (2023-06-27) |
Commit message:
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a |
modified:
formatter.py macros.xml matchms_filtering_wrapper.py matchms_similarity.xml test-data/similarity/scores_test6_out.json test-data/spectral_similarity/test1.json |
added:
test-data/convert/ms_lima_output.msp test-data/filtering/reduce_to_top_n_peaks.msp test-data/filtering/require_filter.msp test-data/filtering/require_inchi_out.msp test-data/filtering/require_smiles_out.msp test-data/out_matchms_add_key.msp |
b |
diff -r e5010b19d64d -r 872d8040f713 formatter.py --- a/formatter.py Tue Jun 27 14:26:29 2023 +0000 +++ b/formatter.py Thu Oct 12 13:25:30 2023 +0000 |
[ |
@@ -13,10 +13,12 @@ DataFrame: Scores DataFrame: Matches """ - dataframe = DataFrame(columns=['query', 'reference', *scores.scores.score_names]) + data = [] for i, (row, col) in enumerate(zip(scores.scores.row, scores.scores.col)): - dataframe.loc[i] = [scores.queries[col].metadata['compound_name'], scores.references[row].metadata['compound_name'], *scores.scores.data[i]] + data.append([scores.queries[col].metadata['compound_name'], scores.references[row].metadata['compound_name'], *scores.scores.data[i]]) + + dataframe = DataFrame(data, columns=['query', 'reference', *scores.scores.score_names]) return dataframe |
b |
diff -r e5010b19d64d -r 872d8040f713 macros.xml --- a/macros.xml Tue Jun 27 14:26:29 2023 +0000 +++ b/macros.xml Thu Oct 12 13:25:30 2023 +0000 |
b |
@@ -1,5 +1,5 @@ <macros> - <token name="@TOOL_VERSION@">0.20.0</token> + <token name="@TOOL_VERSION@">0.22.0</token> <xml name="creator"> <creator> @@ -57,9 +57,11 @@ <xml name="input_param"> <conditional name="scores"> - <param name="use_scores" label="Use Scores Object" type="boolean" truevalue="TRUE" falsevalue="FALSE" - checked="false"/> - <when value="TRUE"> + <param name="use_scores" label="Use Scores Object" type="select"> + <option value="False" selected="true">FALSE</option> + <option value="True">TRUE</option> + </param> + <when value="True"> <param label="Scores object" name="scores_in" type="data" format="json" help="Scores objects calculated previously using one of the matchms similarity tools." /> <param label="join type" name="join_type" type="select" display="radio" @@ -70,7 +72,7 @@ <option value="outer">outer</option> </param> </when> - <when value="FALSE"> + <when value="False"> <param label="Queries spectra" name="queries" type="data" format="msp" help="Query mass spectra to match against references."/> <param label="Reference spectra" name="references" type="data" format="msp" @@ -89,7 +91,7 @@ <token name="@init_scores@"> from matchms.importing import load_from_msp, scores_from_json from matchms import Scores -#if $scores.use_scores +#if $scores.use_scores == "True" scores = scores_from_json("$scores_in") join_type = "$scores.join_type" #else |
b |
diff -r e5010b19d64d -r 872d8040f713 matchms_filtering_wrapper.py --- a/matchms_filtering_wrapper.py Tue Jun 27 14:26:29 2023 +0000 +++ b/matchms_filtering_wrapper.py Thu Oct 12 13:25:30 2023 +0000 |
b |
@@ -4,10 +4,18 @@ from matchms.exporting import save_as_mgf, save_as_msp from matchms.filtering import add_compound_name, add_fingerprint, add_losses, add_parent_mass, add_precursor_mz,\ add_retention_index, add_retention_time, clean_compound_name -from matchms.filtering import default_filters, normalize_intensities, select_by_mz, select_by_relative_intensity +from matchms.filtering import default_filters, normalize_intensities, reduce_to_number_of_peaks, select_by_mz, \ + select_by_relative_intensity from matchms.importing import load_from_mgf, load_from_msp +def require_key(spectrum, key): + if spectrum.get(key): + return spectrum + + return None + + def main(argv): parser = argparse.ArgumentParser(description="Compute MSP similarity scores") parser.add_argument("--spectra", type=str, required=True, help="Mass spectra file to be filtered.") @@ -27,13 +35,23 @@ help="Keep only peaks between set m/z range (keep if to_mz >= m/z >= from_mz).") parser.add_argument("--from_mz", type=float, help="Lower bound for m/z filter") parser.add_argument("--to_mz", type=float, help="Upper bound for m/z filter") + parser.add_argument("-require_smiles", action='store_true', + help="Remove spectra that does not contain SMILES.") + parser.add_argument("-require_inchi", action='store_true', + help="Remove spectra that does not contain INCHI.") + parser.add_argument("-reduce_to_top_n_peaks", action='store_true', + help="reduce to top n peaks filter.") + parser.add_argument("--n_max", type=int, help="Maximum number of peaks. Remove peaks if more peaks are found.") args = parser.parse_args() if not (args.normalise_intensities or args.default_filters or args.clean_metadata or args.relative_intensity - or args.mz_range): + or args.mz_range + or args.require_smiles + or args.require_inchi + or args.reduce_to_top_n_peaks): raise ValueError('No filter selected.') if args.spectra_format == 'msp': @@ -63,7 +81,17 @@ if args.mz_range: spectrum = select_by_mz(spectrum, args.from_mz, args.to_mz) - filtered_spectra.append(spectrum) + if args.reduce_to_top_n_peaks: + spectrum = reduce_to_number_of_peaks(spectrum_in=spectrum, n_max=args.n_max) + + if args.require_smiles and spectrum is not None: + spectrum = require_key(spectrum, "smiles") + + if args.require_inchi and spectrum is not None: + spectrum = require_key(spectrum, "inchi") + + if spectrum is not None: + filtered_spectra.append(spectrum) if args.spectra_format == 'msp': save_as_msp(filtered_spectra, args.output) |
b |
diff -r e5010b19d64d -r 872d8040f713 matchms_similarity.xml --- a/matchms_similarity.xml Tue Jun 27 14:26:29 2023 +0000 +++ b/matchms_similarity.xml Thu Oct 12 13:25:30 2023 +0000 |
[ |
@@ -1,4 +1,4 @@ -<tool id="matchms_similarity" name="matchms similarity" version="@TOOL_VERSION@+galaxy0" profile="21.09"> +<tool id="matchms_similarity" name="matchms similarity" version="@TOOL_VERSION@+galaxy2" profile="21.09"> <description>calculate the similarity score and matched peaks</description> <macros> @@ -13,21 +13,21 @@ <requirement type="package" version="@TOOL_VERSION@">matchms</requirement> </requirements> - <environment_variables> - <environment_variable name="MPLCONFIGDIR">\$_GALAXY_JOB_TMP_DIR</environment_variable> - </environment_variables> - <command detect_errors="exit_code"><![CDATA[ sh ${matchms_python_cli} ]]> </command> + <environment_variables> + <environment_variable name="MPLCONFIGDIR">\$_GALAXY_JOB_TMP_DIR</environment_variable> + </environment_variables> + <configfiles> <configfile name="matchms_python_cli"> python3 ${__tool_directory__}/matchms_similarity_wrapper.py \ - #if $ri_filtering.is_true + #if $ri_filtering.is_true == "TRUE" -r $ri_filtering.tolerance \ #end if - #if $symmetric.is_symmetric + #if $symmetric.is_symmetric == "TRUE" -s \ #else --ref "$references" \ @@ -58,12 +58,15 @@ <param label="Queries spectra" name="queries" type="data" format="msp,mgf" help="Query mass spectra to match against references."/> <conditional name="symmetric"> - <param name="is_symmetric" label="Symmetric" type="boolean" truevalue="TRUE" falsevalue="FALSE" - checked="false"/> + <param name="is_symmetric" label="Symmetric" type="select"> + <option value="FALSE" selected="true">FALSE</option> + <option value="TRUE">TRUE</option> + </param> <when value="FALSE"> <param label="Reference spectra" name="references" type="data" format="msp,mgf" help="Reference mass spectra to match against as library."/> </when> + <when value="TRUE"></when> </conditional> <param label="Scores array type" name="array_type" type="select" display="radio" help="Matrix type for storing scores objects. Sparse type more memory-efficient and better for large arrays. @@ -107,12 +110,15 @@ <conditional name="ri_filtering"> - <param name="is_true" label="Apply RI filtering" type="boolean" truevalue="TRUE" falsevalue="FALSE" - checked="false"/> + <param name="is_true" label="Apply RI filtering" type="select"> + <option value="FALSE" selected="true">FALSE</option> + <option value="TRUE">TRUE</option> + </param> <when value="TRUE"> <param label="tolerance" name="tolerance" type="float" value="60" help="Peaks will be considered a match when less than tolerance apart."/> </when> + <when value="FALSE"></when> </conditional> </inputs> @@ -141,7 +147,7 @@ <param name="references" value="similarity/fill.mgf" ftype="mgf"/> <param name="queries" value="similarity/fill2.msp" ftype="msp"/> <conditional name="ri_filtering"> - <param name="is_true" value="True"></param> + <param name="is_true" value="TRUE"></param> <param name="tolerance" value="60.0" /> </conditional> <conditional name="metric"> @@ -164,7 +170,7 @@ </conditional> <param name="is_symmetric" value="TRUE" /> <conditional name="ri_filtering"> - <param name="is_true" value="True"></param> + <param name="is_true" value="TRUE"></param> <param name="tolerance" value="60.0" /> </conditional> <output name="similarity_scores" file="similarity/scores_test5_out.json" ftype="json"/> @@ -178,7 +184,7 @@ <param name="model_weights" value="similarity/spec2vec/weights_100.binary" ftype="auto"/> <param name="allow_missing_percentage" value="1.0"/> </conditional> - <output name="similarity_scores" file="similarity/scores_test6_out.json" ftype="json" compare="sim_size" delta="100000"/> + <output name="similarity_scores" file="similarity/scores_test6_out.json" ftype="json" compare="sim_size" delta="1000"/> </test> </tests> |
b |
diff -r e5010b19d64d -r 872d8040f713 test-data/convert/ms_lima_output.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/convert/ms_lima_output.msp Thu Oct 12 13:25:30 2023 +0000 |
[ |
b'@@ -0,0 +1,4848 @@\n+FORMULA: C4H10NO3PS\n+INCHIKEY: YASYVMFAVPKPKE-SECBINFHSA-N\n+SMILES: COP(=O)(N=C(O)C)SC\n+NAME: Acephate\n+RETENTIONTIME: 1.232997\n+PRECURSORMZ: 184.0194\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 16\n+90.09368 1128.0\n+93.11512 1241.0\n+95.10279 1118.0\n+101.31465 1152.0\n+102.90688 1322.0\n+103.98039 1201.0\n+112.01607 12289.0\n+112.99994 38027.0\n+115.00399 1634.0\n+124.98121 922.0\n+128.97701 9208.0\n+132.57193 1350.0\n+135.84808 1428.0\n+142.99275 16419.0\n+147.94205 1750.0\n+173.5094 2353.0\n+\n+FORMULA: C12H11NO2\n+INCHIKEY: CVXBEEMKQHEXEN-UHFFFAOYSA-N\n+SMILES: CN=C(Oc1cccc2c1cccc2)O\n+NAME: Carbaryl\n+RETENTIONTIME: 5.259445\n+PRECURSORMZ: 202.0863\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 1\n+145.06491 1326147.0\n+\n+FORMULA: C8H16NO5P\n+INCHIKEY: VEENJGZXVHKXNB-UHFFFAOYSA-N\n+SMILES: COP(=O)(OC(=CC(=O)N(C)C)C)OC\n+NAME: Dicrotophos\n+RETENTIONTIME: 2.025499\n+PRECURSORMZ: 238.0844\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 5\n+112.074 102027.0\n+112.07591 9070987.0\n+127.01563 3230337.0\n+193.02605 7897744.0\n+238.08437 2973124.0\n+\n+FORMULA: C5H12NO3PS2\n+INCHIKEY: MCWXGJITAZMZEV-UHFFFAOYSA-N\n+SMILES: CN=C(CSP(=S)(OC)OC)O\n+NAME: Dimethoate\n+RETENTIONTIME: 2.866696\n+PRECURSORMZ: 230.0072\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 8\n+88.0219 548446.0\n+124.98233 183861.0\n+142.99275 722053.0\n+156.95422 80792.0\n+170.97 1426256.0\n+197.98123 240915.0\n+198.96501 5415933.0\n+230.00722 497851.0\n+\n+FORMULA: C21H22NO4Cl\n+INCHIKEY: QNBTYORWCCMPQP-UHFFFAOYSA-N\n+SMILES: COc1cc(ccc1OC)C(=CC(=O)N1CCOCC1)c1ccc(cc1)Cl\n+NAME: Dimethomorph\n+RETENTIONTIME: 7.060486\n+PRECURSORMZ: 388.1316\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 22\n+114.05532 468862.0\n+125.01571 886745.0\n+138.99484 4138370.0\n+155.0705 425164.0\n+165.05519 15513399.0\n+165.06543 350695.0\n+195.08057 386226.0\n+215.0262 490061.0\n+223.07544 702025.0\n+227.02576 230514.0\n+229.04225 216308.0\n+235.07555 241142.0\n+238.09914 1323577.0\n+242.04929 2449236.0\n+243.02142 891584.0\n+257.03726 578874.0\n+258.04443 3232295.0\n+266.0943 358273.0\n+270.04492 608851.0\n+273.06772 3866006.0\n+286.03912 483547.0\n+301.06311 4060551.0\n+\n+FORMULA: C2H8NO2PS\n+INCHIKEY: NNKVPIKMPCQWCG-ZCFIWIBFSA-N\n+SMILES: COP(=O)(SC)N\n+NAME: Methamidophos\n+RETENTIONTIME: 1.153307\n+PRECURSORMZ: 142.0089\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 4\n+98.00042 37721.0\n+109.98272 71172.0\n+112.01607 2867923.0\n+127.99321 75837.0\n+\n+FORMULA: C7H13O6P\n+INCHIKEY: GEPDYQSQVLXLEU-UHFFFAOYSA-N\n+SMILES: COC(=O)C=C(OP(=O)(OC)OC)C\n+NAME: Mevinphos\n+RETENTIONTIME: 2.876307\n+PRECURSORMZ: 225.0525\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 4\n+99.04416 295529.0\n+127.01563 1960973.0\n+193.02605 1150190.0\n+225.05209 101872.0\n+\n+FORMULA: C5H12NO4PS\n+INCHIKEY: PZXOQEXFMJCDPG-UHFFFAOYSA-N\n+SMILES: CN=C(CSP(=O)(OC)OC)O\n+NAME: Omethoate\n+RETENTIONTIME: 1.33423\n+PRECURSORMZ: 214.0303\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 5\n+104.01654 86844.0\n+124.98233 194375.0\n+127.01563 4696021.0\n+128.97701 47970.0\n+142.99275 4310988.0\n+\n+FORMULA: C16H20O6P2S3\n+INCHIKEY: WWJZWCUNLNYYAU-UHFFFAOYSA-N\n+SMILES: COP(=S)(Oc1ccc(cc1)Sc1ccc(cc1)OP(=S)(OC)OC)OC\n+NAME: Temephos\n+RETENTIONTIME: 7.736881\n+PRECURSORMZ: 466.9978\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 44\n+124.98233 218400.0\n+125.00596 124192.0\n+127.01563 590561.0\n+139.02167 79978.0\n+139.05467 105470.0\n+140.95975 428071.0\n+142.99275 7482486.0\n+154.99849 619650.0\n+157.00861 365474.0\n+171.02641 502869.0\n+172.03448 151150.0\n+183.02695 176056.0\n+184.03453 206568.0\n+187.02121 240339.0\n+199.02151 245544.0\n+200.02902 385101.0\n+201.03729 198527.0\n+211.03268 88063.0\n+215.01689 538632.0\n+217.03214 259530.0\n+218.98798 87371.0\n+219.02972 94609.0\n+230.99336 108101.0\n+232.03233 244260.0\n+233.00958 88058.0\n+247.02538 224924.0\n+248.03291 127038.0\n+261.98486 132283.0\n+262.99268 185876.0\n+264.00052 186556.0\n+278.98856 20'..b'1 1076938.0\n+214.04874 274804.0\n+218.05339 138241.0\n+223.04858 173264.0\n+225.06474 1428863.0\n+238.05968 4320120.0\n+239.08025 155000.0\n+247.08057 319312.0\n+253.14586 318558.0\n+255.08643 272181.0\n+267.08603 1563035.0\n+270.13541 250539.0\n+281.12677 392614.0\n+295.14307 440522.0\n+298.08517 500719.0\n+298.15424 170519.0\n+299.09323 317366.0\n+307.14276 192450.0\n+323.14941 13337730.0\n+328.07541 186287.0\n+334.15344 701456.0\n+348.08228 404641.0\n+353.0658 262110.0\n+366.07346 193709.0\n+368.08932 5815862.0\n+495.20059 2433116.0\n+\n+FORMULA: C11H16N2O2\n+INCHIKEY: IMIDOCRTMDIQIJ-UHFFFAOYSA-N\n+SMILES: CN=C(Oc1ccc(c(c1)C)N(C)C)O\n+NAME: Aminocarb_1\n+RETENTIONTIME: 0.8035756\n+PRECURSORMZ: 209.129\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 5\n+120.05733 176701.0\n+122.06016 1917070.0\n+136.07611 928093.0\n+137.08363 8823033.0\n+152.10725 186336.0\n+\n+FORMULA: C11H16N2O2\n+INCHIKEY: IMIDOCRTMDIQIJ-UHFFFAOYSA-N\n+SMILES: CN=C(Oc1ccc(c(c1)C)N(C)C)O\n+NAME: Aminocarb_2\n+RETENTIONTIME: 1.13997\n+PRECURSORMZ: 209.129\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 5\n+120.05733 247123.0\n+122.06016 2666029.0\n+136.07611 1253139.0\n+137.08363 12201258.0\n+152.10725 242082.0\n+\n+FORMULA: C9H20N2O2\n+INCHIKEY: WZZLDXDUQPOXNW-UHFFFAOYSA-N\n+SMILES: CCCOC(=NCCCN(C)C)O\n+NAME: Propamocarb_1\n+RETENTIONTIME: 0.7535679\n+PRECURSORMZ: 189.1603\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 2\n+86.0966 201548.0\n+102.05516 5038638.0\n+\n+FORMULA: C9H20N2O2\n+INCHIKEY: WZZLDXDUQPOXNW-UHFFFAOYSA-N\n+SMILES: CCCOC(=NCCCN(C)C)O\n+NAME: Propamocarb_2\n+RETENTIONTIME: 1.081971\n+PRECURSORMZ: 189.1603\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 2\n+86.0966 107829.0\n+102.05516 2507023.0\n+\n+FORMULA: C11H15N3O2\n+INCHIKEY: MYPKGPZHHQEODQ-UHFFFAOYSA-N\n+SMILES: CN=C(Oc1cccc(c1)N=CN(C)C)O\n+NAME: Formetanate_1\n+RETENTIONTIME: 0.7730471\n+PRECURSORMZ: 222.1239\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 13\n+93.03365 1796.0\n+107.04935 1981.0\n+111.04435 82262.0\n+118.04142 1927.0\n+120.04462 150907.0\n+121.03984 67610.0\n+122.06016 5909.0\n+122.75254 1678.0\n+150.98424 1930.0\n+165.1024 143887.0\n+173.50876 2616.0\n+200.05632 2056.0\n+208.52768 2170.0\n+\n+FORMULA: C11H15N3O2\n+INCHIKEY: MYPKGPZHHQEODQ-UHFFFAOYSA-N\n+SMILES: CN=C(Oc1cccc(c1)N=CN(C)C)O\n+NAME: Formetanate_2\n+RETENTIONTIME: 1.13043\n+PRECURSORMZ: 222.1239\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 15\n+91.05441 6330.0\n+93.03365 27201.0\n+107.04935 4024.0\n+111.04435 131558.0\n+115.05429 3711.0\n+117.06996 5571.0\n+118.04177 4476.0\n+120.04462 274740.0\n+121.03984 113412.0\n+122.06016 7843.0\n+124.07605 4049.0\n+135.04427 4178.0\n+145.06488 3067.0\n+164.95049 3848.0\n+165.1024 263802.0\n+\n+FORMULA: C12H18N2O2\n+INCHIKEY: YNEVBPNZHBAYOA-UHFFFAOYSA-N\n+SMILES: CN=C(Oc1cc(C)c(c(c1)C)N(C)C)O\n+NAME: Mexacarbate\n+RETENTIONTIME: 1.682191\n+PRECURSORMZ: 223.1443\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 5\n+134.07283 2632951.0\n+136.07611 26036728.0\n+150.092 1572118.0\n+151.09932 54847764.0\n+166.12282 1541928.0\n+\n+FORMULA: C19H21N2OCl\n+INCHIKEY: OGYFATSSENRIKG-UHFFFAOYSA-N\n+SMILES: Clc1ccc(cc1)CN(C(=Nc1ccccc1)O)C1CCCC1\n+NAME: Monceren\n+RETENTIONTIME: 7.14553\n+PRECURSORMZ: 329.1426\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 5\n+89.03881 550831.0\n+94.06543 635265.0\n+106.06545 446416.0\n+125.01307 512150.0\n+125.01532 37442116.0\n+\n+FORMULA: C16H16N2O4\n+INCHIKEY: WZJZMXBKUWKXTQ-UHFFFAOYSA-N\n+SMILES: CCOC(=Nc1cccc(c1)OC(=Nc1ccccc1)O)O\n+NAME: Desmedipham\n+RETENTIONTIME: 6.430396\n+PRECURSORMZ: 301.1192\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 3\n+136.03947 1773399.0\n+154.04993 1002798.0\n+182.08162 6480130.0\n+\n+FORMULA: C16H16N2O4\n+INCHIKEY: IDOWTHOLJBTAFI-UHFFFAOYSA-N\n+SMILES: COC(=Nc1cccc(c1)OC(=Nc1cccc(c1)C)O)O\n+NAME: Phenmedipham\n+RETENTIONTIME: 6.570995\n+PRECURSORMZ: 301.1185\n+PRECURSORTYPE: [M+H]+\n+INSTRUMENTTYPE: LC-ESI-Orbitrap\n+NUM PEAKS: 2\n+136.03947 2596929.0\n+168.06587 7038054.0\n+\n' |
b |
diff -r e5010b19d64d -r 872d8040f713 test-data/filtering/reduce_to_top_n_peaks.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filtering/reduce_to_top_n_peaks.msp Thu Oct 12 13:25:30 2023 +0000 |
b |
@@ -0,0 +1,57 @@ +IONMODE: negative +SPECTRUMTYPE: Centroid +COMPOUND_NAME: C001 +RETENTION_TIME: 38.74 +RETENTION_INDEX: None +NUM PEAKS: 5 +175.0641 26780143.0 +206.9034 26130980.0 +216.9205 32607700.0 +254.8252 23747536.0 +256.8215 31377637.0 + +IONMODE: negative +SPECTRUMTYPE: Centroid +COMPOUND_NAME: C002 +RETENTION_TIME: 520.25 +RETENTION_INDEX: 1234.5 +NUM PEAKS: 5 +310.1623 295359836.0 +525.375 1073323842.0 +526.3783 181668883.0 +551.3321 111616808.0 +1047.7378 150394804.0 + +IONMODE: negative +SPECTRUMTYPE: Centroid +COMPOUND_NAME: C003 +RETENTION_TIME: 483.67 +NUM PEAKS: 5 +288.6414 202172046.0 +522.3565 4089569222.0 +523.354 1201714423.0 +1043.7028 144351468.0 +1044.7068 83271854.0 + +IONMODE: negative +SPECTRUMTYPE: Centroid +COMPOUND_NAME: C004 +RETENTION_TIME: 473.48 +NUM PEAKS: 5 +496.34 12577588056.0 +497.3442 3337125302.0 +498.3462 532285213.0 +991.6726 1420557258.0 +992.6749 763118028.0 + +IONMODE: negative +SPECTRUMTYPE: Centroid +COMPOUND_NAME: C005 +RETENTION_TIME: 41.72 +NUM PEAKS: 5 +218.1386 14009249.0 +337.0623 88672453.0 +353.0361 37061354.0 +359.0443 48435582.0 +375.018 29159485.0 + |
b |
diff -r e5010b19d64d -r 872d8040f713 test-data/filtering/require_filter.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filtering/require_filter.msp Thu Oct 12 13:25:30 2023 +0000 |
[ |
@@ -0,0 +1,72 @@ +FORMULA: C13H9ClFeO4Si +CASNO: 2000570-99-8 +ID: 2011 +COMMENT: SpectrumID: 1519953; Source: C4-1998-38-3; Class: Benzenoids; CASRN not real! +COMPOUND_NAME: ((.eta.5-Cyclopentadienylironbiscarbonyl)(1,2-phenylenedioxysilyl)chloride complex +PARENT_MASS: 347.930801 +PUBCHEMID: 10970124 +NOMINAL_MASS: 348 +SMILES: Cl[Si]1Oc2ccccc2O1.[C-]#[O+].[C-]#[O+].[CH]1C=CC=C1.[Fe] +NUM PEAKS: 3 +292.0 999.0 +314.0 118.89 +348.0 734.24 + +FORMULA: C13H14O +CASNO: 2000130-22-2 +ID: 7198 +COMMENT: SpectrumID: 1752764; Source: A1-13-956/SMS7-13; DOI: 10.1021/ol1029996; QI: 383; Class: Benzene and substituted derivatives; CASRN not real! |RI:1588| +COMPOUND_NAME: ((1R*,2R*)-1-Methyl-2-phenylethynylcyclopropyl)methanol +PARENT_MASS: 186.1044655 +RETENTION_INDEX: 1588.0 +PUBCHEMID: 130762197 +NOMINAL_MASS: 186 +INCHI: InChI=1S/C13H14O/c1-13(10-14)9-12(13)8-7-11-5-3-2-4-6-11/h2-6,12,14H,9-10H2,1H3/t12-,13-/m0/s1 +NUM PEAKS: 20 +51.0 89.92 +63.0 89.92 +77.0 179.84 +88.0 39.96 +89.0 59.95 +91.0 49.95 +102.0 149.86 +113.0 49.95 +115.0 229.79 +127.0 139.87 +128.0 999.0 +129.0 199.82 +144.0 99.91 +155.0 119.89 +156.0 14.89 +157.0 1.1 +158.0 0.1 +186.0 39.96 +187.0 5.89 +188.0 0.5 + +FORMULA: C34H54O4 +CASNO: 2000774-54-3 +ID: 36905 +COMMENT: SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353| +COMPOUND_NAME: ((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate +PARENT_MASS: 526.40221 +RETENTION_INDEX: 3353.0 +PUBCHEMID: 236415 +NOMINAL_MASS: 526 +INCHI: InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1 +SMILES: C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12 +NUM PEAKS: 14 +189.0 419.62 +203.0 249.77 +216.0 149.86 +262.0 79.93 +276.0 49.95 +393.0 149.86 +423.0 219.8 +453.0 179.84 +466.0 999.0 +526.0 179.84 +527.0 68.94 +528.0 14.29 +529.0 2.1 +530.0 0.2 \ No newline at end of file |
b |
diff -r e5010b19d64d -r 872d8040f713 test-data/filtering/require_inchi_out.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filtering/require_inchi_out.msp Thu Oct 12 13:25:30 2023 +0000 |
[ |
@@ -0,0 +1,59 @@ +FORMULA: C13H14O +CASNO: 2000130-22-2 +ID: 7198 +COMMENT: SpectrumID: 1752764; Source: A1-13-956/SMS7-13; DOI: 10.1021/ol1029996; QI: 383; Class: Benzene and substituted derivatives; CASRN not real! |RI:1588| +COMPOUND_NAME: ((1R*,2R*)-1-Methyl-2-phenylethynylcyclopropyl)methanol +PARENT_MASS: 186.1044655 +RETENTION_INDEX: 1588.0 +PUBCHEMID: 130762197 +NOMINAL_MASS: 186 +INCHI: InChI=1S/C13H14O/c1-13(10-14)9-12(13)8-7-11-5-3-2-4-6-11/h2-6,12,14H,9-10H2,1H3/t12-,13-/m0/s1 +NUM PEAKS: 20 +51.0 89.92 +63.0 89.92 +77.0 179.84 +88.0 39.96 +89.0 59.95 +91.0 49.95 +102.0 149.86 +113.0 49.95 +115.0 229.79 +127.0 139.87 +128.0 999.0 +129.0 199.82 +144.0 99.91 +155.0 119.89 +156.0 14.89 +157.0 1.1 +158.0 0.1 +186.0 39.96 +187.0 5.89 +188.0 0.5 + +FORMULA: C34H54O4 +CASNO: 2000774-54-3 +ID: 36905 +COMMENT: SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353| +COMPOUND_NAME: ((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate +PARENT_MASS: 526.40221 +RETENTION_INDEX: 3353.0 +PUBCHEMID: 236415 +NOMINAL_MASS: 526 +INCHI: InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1 +SMILES: C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12 +NUM PEAKS: 14 +189.0 419.62 +203.0 249.77 +216.0 149.86 +262.0 79.93 +276.0 49.95 +393.0 149.86 +423.0 219.8 +453.0 179.84 +466.0 999.0 +526.0 179.84 +527.0 68.94 +528.0 14.29 +529.0 2.1 +530.0 0.2 + |
b |
diff -r e5010b19d64d -r 872d8040f713 test-data/filtering/require_smiles_out.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filtering/require_smiles_out.msp Thu Oct 12 13:25:30 2023 +0000 |
[ |
@@ -0,0 +1,41 @@ +FORMULA: C13H9ClFeO4Si +CASNO: 2000570-99-8 +ID: 2011 +COMMENT: SpectrumID: 1519953; Source: C4-1998-38-3; Class: Benzenoids; CASRN not real! +COMPOUND_NAME: ((.eta.5-Cyclopentadienylironbiscarbonyl)(1,2-phenylenedioxysilyl)chloride complex +PARENT_MASS: 347.930801 +PUBCHEMID: 10970124 +NOMINAL_MASS: 348 +SMILES: Cl[Si]1Oc2ccccc2O1.[C-]#[O+].[C-]#[O+].[CH]1C=CC=C1.[Fe] +NUM PEAKS: 3 +292.0 999.0 +314.0 118.89 +348.0 734.24 + +FORMULA: C34H54O4 +CASNO: 2000774-54-3 +ID: 36905 +COMMENT: SpectrumID: 1800193; Source: PA-7-239-4(DIP); DOI: 10.1002_(SICI)1099-1565(199605)7_3_136; Class: Triterpenoids; CASRN not real! |RI:3353| +COMPOUND_NAME: ((1R,3aS,5aR,5bR,7aR,9S,11aR,11bR,13aR,13bR)-9-acetoxy-5a,5b,8,8,11a-pentamethyl-1-(prop-1-en-2-yl)icosahydro-1H-cyclopenta[a]chrysen-3a-yl)methyl acetate +PARENT_MASS: 526.40221 +RETENTION_INDEX: 3353.0 +PUBCHEMID: 236415 +NOMINAL_MASS: 526 +INCHI: InChI=1S/C34H54O4/c1-21(2)24-12-17-34(20-37-22(3)35)19-18-32(8)25(29(24)34)10-11-27-31(7)15-14-28(38-23(4)36)30(5,6)26(31)13-16-33(27,32)9/h24-29H,1,10-20H2,2-9H3/t24-,25+,26-,27+,28-,29+,31-,32+,33+,34+/m0/s1 +SMILES: C=C(C)[C@@H]1CC[C@]2(COC(C)=O)CC[C@]3(C)[C@H](CC[C@@H]4[C@@]5(C)CC[C@H](OC(C)=O)C(C)(C)[C@@H]5CC[C@]43C)[C@@H]12 +NUM PEAKS: 14 +189.0 419.62 +203.0 249.77 +216.0 149.86 +262.0 79.93 +276.0 49.95 +393.0 149.86 +423.0 219.8 +453.0 179.84 +466.0 999.0 +526.0 179.84 +527.0 68.94 +528.0 14.29 +529.0 2.1 +530.0 0.2 + |
b |
diff -r e5010b19d64d -r 872d8040f713 test-data/out_matchms_add_key.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out_matchms_add_key.msp Thu Oct 12 13:25:30 2023 +0000 |
b |
@@ -0,0 +1,199 @@ +IONMODE: Negative +SPECTRUMTYPE: Centroid +COMPOUND_NAME: C001 +RETENTION_TIME: 38.74 +RETENTION_INDEX: -1 +TOOL_USED: matchms +NUM PEAKS: 57 +138.9121 10186226.0 +148.9337 1008656.0 +175.0641 26780143.0 +186.1095 2675456.0 +196.8658 21390430.0 +198.8647 21688594.0 +200.8848 7742528.0 +206.9034 26130980.0 +216.9205 32607700.0 +234.0134 2550129.0 +254.8252 23747536.0 +256.8215 31377637.0 +258.8237 15532799.0 +266.8652 9805546.0 +268.8537 3090354.0 +306.9914 3169316.0 +312.7841 10051801.0 +316.7777 10734168.0 +322.8157 6317648.0 +324.9549 8619910.0 +334.849 4178412.0 +342.8093 3285552.0 +349.9455 2050695.0 +350.9875 6150799.0 +351.941 1965882.0 +366.8281 3253770.0 +370.7418 9765463.0 +372.7383 19374863.0 +382.8218 12815572.0 +384.8177 8311500.0 +392.7685 10913351.0 +413.2664 3965867.0 +426.7772 5431633.0 +428.7834 8554675.0 +434.7287 9943329.0 +436.8161 3705247.0 +440.7322 10603010.0 +442.7401 8271752.0 +450.7016 8762673.0 +460.7076 4528973.0 +462.7862 2123666.0 +484.7242 4273989.0 +486.7743 4886062.0 +488.6825 12267966.0 +492.744 7662344.0 +494.8953 7188793.0 +498.8794 6811405.0 +500.8484 6520691.0 +502.7832 3567833.0 +510.763 4989757.0 +518.7415 4243468.0 +546.6093 7177067.0 +550.6949 6104789.0 +566.5977 5171811.0 +612.6927 2005587.0 +676.6436 1982714.0 +800.4451 2792137.0 + +IONMODE: Negative +SPECTRUMTYPE: Centroid +COMPOUND_NAME: C002 +RETENTION_TIME: 520.25 +RETENTION_INDEX: 1234.5 +TOOL_USED: matchms +NUM PEAKS: 35 +131.1733 1971789.0 +267.2688 6103973.0 +279.0196 1946255.0 +289.6491 46498377.0 +301.1565 15185412.0 +309.1649 18045974.0 +310.1623 295359836.0 +311.1658 13124727.0 +312.0296 38757284.0 +330.6757 12666597.0 +525.375 1073323842.0 +526.3783 181668883.0 +527.3812 23642795.0 +551.3321 111616808.0 +552.3348 28340614.0 +553.3314 2609936.0 +562.3269 7538206.0 +578.2905 7578406.0 +619.3008 4742103.0 +624.296 11790213.0 +813.5403 25060147.0 +814.5336 5865975.0 +955.1171 2322927.0 +1047.7378 150394804.0 +1048.7399 90978863.0 +1049.7432 29946438.0 +1050.7453 6807767.0 +1069.7158 5074652.0 +1074.1979 3402288.0 +1075.1968 33352763.0 +1076.2004 10417953.0 +1101.6535 2023916.0 +1206.3127 3738816.0 +1216.8041 4439324.0 +1217.807 3565334.0 + +IONMODE: Negative +SPECTRUMTYPE: Centroid +COMPOUND_NAME: C003 +RETENTION_TIME: 483.67 +TOOL_USED: matchms +NUM PEAKS: 26 +265.2529 11366224.0 +266.2564 1420444.0 +279.6362 29849749.0 +280.6546 8848921.0 +288.6414 202172046.0 +378.2093 15309961.0 +379.1966 2902366.0 +522.3565 4089569222.0 +523.354 1201714423.0 +549.3267 63300808.0 +576.2749 7386007.0 +577.3074 2354251.0 +617.2778 2323470.0 +625.4543 4040374.0 +796.9808 13576738.0 +797.9841 6368973.0 +809.9883 12596682.0 +810.9916 6601055.0 +1043.7028 144351468.0 +1044.7068 83271854.0 +1045.706 27998321.0 +1046.7131 6505178.0 +1058.1594 20718345.0 +1059.1626 6608764.0 +1071.1639 15461047.0 +1072.1671 5096642.0 + +IONMODE: Negative +SPECTRUMTYPE: Centroid +COMPOUND_NAME: C004 +RETENTION_TIME: 473.48 +TOOL_USED: matchms +NUM PEAKS: 24 +124.1405 6517662.0 +170.2437 1237313.0 +275.6336 28001849.0 +296.147 190395687.0 +482.3247 145772322.0 +483.3283 36245876.0 +496.34 12577588056.0 +497.3442 3337125302.0 +498.3462 532285213.0 +499.3493 68176083.0 +770.964 49250157.0 +771.9675 22666873.0 +783.9721 9839299.0 +784.9749 3622908.0 +949.6233 8009033.0 +950.6274 3674694.0 +991.6726 1420557258.0 +992.6749 763118028.0 +993.6787 239161906.0 +994.6801 53549573.0 +1017.6897 168186952.0 +1018.6656 120599518.0 +1019.6555 57647644.0 +1020.6591 12469103.0 + +IONMODE: Negative +SPECTRUMTYPE: Centroid +COMPOUND_NAME: C005 +RETENTION_TIME: 41.72 +TOOL_USED: matchms +NUM PEAKS: 20 +218.1386 14009249.0 +337.0623 88672453.0 +338.0654 8770055.0 +353.0361 37061354.0 +359.0443 48435582.0 +360.0459 5025128.0 +375.018 29159485.0 +376.0216 2740193.0 +381.0261 13522755.0 +396.9999 10317665.0 +417.0027 13822994.0 +418.9966 4386311.0 +432.9764 9779399.0 +438.9851 11307111.0 +440.9796 3364168.0 +454.9592 9820452.0 +456.9603 3774845.0 +470.9263 3632486.0 +512.8989 4072570.0 +572.871 3485486.0 + |
b |
diff -r e5010b19d64d -r 872d8040f713 test-data/similarity/scores_test6_out.json --- a/test-data/similarity/scores_test6_out.json Tue Jun 27 14:26:29 2023 +0000 +++ b/test-data/similarity/scores_test6_out.json Thu Oct 12 13:25:30 2023 +0000 |
[ |
b'@@ -1,1 +1,1 @@\n-{"__Scores__": true, "is_symmetric": false, "references": [{"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C20H12", "inchikey": "CSHWQDPOILHKBI-UHFFFAOYSA-N", "smiles": "C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "compound_name": "Perylene", "retention_time": null, "retention_index": 2886.9, "precursor_mz": 252.09323, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "charge": 1, "parent_mass": "251.08595400000002", "peak_comments": {"252.09323": "Theoretical m/z 252.093354, Mass diff 0 (0.49 ppm), SMILES C1=CC=2C=CC=C3C4=CC=CC5=CC=CC(C(=C1)C23)=C54, Annotation [C20H12]+, Rule of HR False"}, "num_peaks": "3", "peaks_json": [[250.07765, 0.3282529462971431], [252.09323, 1.0], [253.09656, 0.20573802940517583]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "YNPNZTXNASCQKK-UHFFFAOYSA-N", "smiles": "C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "compound_name": "Phenanthrene", "retention_time": null, "retention_index": 1832.9, "precursor_mz": 178.0775, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "charge": 1, "parent_mass": "177.070224", "peak_comments": {"176.062": "Theoretical m/z 176.0626, Mass diff 0 (0 ppm), Formula C14H8", "177.06982": "Theoretical m/z 177.070425, Mass diff 0 (0 ppm), Formula C14H9", "178.0775": "Theoretical m/z 178.077698, Mass diff 0 (1.11 ppm), SMILES C=1C=CC2=C(C1)C=CC=3C=CC=CC32, Annotation [C14H10]+, Rule of HR False"}, "num_peaks": "5", "peaks_json": [[152.0619, 0.1657993569424221], [176.062, 0.24558560966311757], [177.06982, 0.12764433529926775], [178.0775, 1.0], [179.08078, 0.16394988149600653]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C14H10", "inchikey": "MWPLVEDNUUSJAV-UHFFFAOYSA-N", "smiles": "C1=CC2=CC3=C(C=CC=C3)C=C2C=C1", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "compound_name": "Anthracene", "retention_time": null, "retention_index": 1844.4, "precursor_mz": 178.07754, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "charge": 1, "parent_mass": "177.070264", "peak_comments": {"176.06204": "Theoretical m/z 176.0626, Mass diff 0 (0 ppm), Formula C14H8", "177.06984": "Theoretical m/z 177.070425, Mass diff 0 (0 ppm), Formula C14H9", "178.07754": "Theoretical m/z 178.077698, Mass diff 0 (0.89 ppm), SMILES C=1C=CC=2C=C3C=CC=CC3=CC2C1, Annotation [C14H10]+, Rule of HR False"}, "num_peaks": "5", "peaks_json": [[152.06195, 0.12450313104470498], [176.06204, 0.23295403420236208], [177.06984, 0.1074344883724439], [178.07754, 1.0], [179.08081, 0.1616741186784917]]}, {"scannumber": "-1", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C12H10", "inchikey": "CWRYPZZKDGJXCA-UHFFFAOYSA-N", "smiles": "C1CC2=C3C1=CC=CC3=CC=C2", "authors": "Price et al., RECETOX, Masaryk University (CZ)", "instrument": "Q Exactive GC Orbitrap GC-MS/MS", "ionization": "EI+", "license": "CC BY-NC", "compound_name": "Acenaphthene", "retention_time": null, "retention_index": 1528.3, "precursor_mz": 154.07741, "adduct": "[M]+", "collision_energy": "70eV", "instrument_type": "GC-EI-Orbitrap", "charge": 1, "parent_mass": "153.070134", "peak_comments": {"151.05418": "Theoretical m/z 151.054775, Mass diff 0 (0 ppm), Formula C12H7", "153.06969": "Theoretical m/z 153.070425, Mass diff 0 (0 ppm), Formula C12H9", "154.07741": "Theoretical m/z 154.077698, Mass diff 0 (1.87 ppm), SMILES C=1C=C2C=CC=C3C2=C(C1)CC3, Annotation [C12H10]+, Rule of HR False"}, "num_peaks":'..b'N], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN], [NaN]], "dtype": [["Spec2Vec", "<f8"]]}\n\\ No newline at end of file\n' |
b |
diff -r e5010b19d64d -r 872d8040f713 test-data/spectral_similarity/test1.json --- a/test-data/spectral_similarity/test1.json Tue Jun 27 14:26:29 2023 +0000 +++ b/test-data/spectral_similarity/test1.json Thu Oct 12 13:25:30 2023 +0000 |
[ |
b'@@ -1,1 +1,1 @@\n-{"__Scores__": true, "is_symmetric": false, "references": [{"scannumber": "1161", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C4H10NO3PS", "inchikey": "YASYVMFAVPKPKE-SECBINFHSA-N", "smiles": "COP(=O)(N=C(O)C)SC", "authors": "Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)", "instrument": "LC Orbitrap Fusion Tribrid MS", "ionization": "ESI+", "license": "CC BY-NC", "num_peaks": "16", "compound_name": "Acephate", "retention_time": 1.232997, "precursor_mz": 184.0194, "adduct": "[M+H]+", "instrument_type": "LC-ESI-Orbitrap", "peaks_json": [[90.09368, 1128.0], [93.11512, 1241.0], [95.10279, 1118.0], [101.31465, 1152.0], [102.90688, 1322.0], [103.98039, 1201.0], [112.01607, 12289.0], [112.99994, 38027.0], [115.00399, 1634.0], [124.98121, 922.0], [128.97701, 9208.0], [132.57193, 1350.0], [135.84808, 1428.0], [142.99275, 16419.0], [147.94205, 1750.0], [173.5094, 2353.0]]}, {"scannumber": "2257", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C12H11NO2", "inchikey": "CVXBEEMKQHEXEN-UHFFFAOYSA-N", "smiles": "CN=C(Oc1cccc2c1cccc2)O", "authors": "Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)", "instrument": "LC Orbitrap Fusion Tribrid MS", "ionization": "ESI+", "license": "CC BY-NC", "peak_comments": {"145.06491": "Theoretical m/z 145.064787, Mass diff 0 (0.85 ppm), SMILES OC1=CC=CC=2C=CC=CC12, Annotation [C10H8O+H]+, Rule of HR True"}, "num_peaks": "1", "compound_name": "Carbaryl", "retention_time": 5.259445, "precursor_mz": 202.0863, "adduct": "[M+H]+", "instrument_type": "LC-ESI-Orbitrap", "peaks_json": [[145.06491, 1326147.0]]}, {"scannumber": "1516", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C8H16NO5P", "inchikey": "VEENJGZXVHKXNB-UHFFFAOYSA-N", "smiles": "COP(=O)(OC(=CC(=O)N(C)C)C)OC", "authors": "Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)", "instrument": "LC Orbitrap Fusion Tribrid MS", "ionization": "ESI+", "license": "CC BY-NC", "peak_comments": {"112.07591": "Theoretical m/z 112.075687, Mass diff 0 (1.99 ppm), SMILES O=C(C=CC)N(C)C, Annotation [C6H11NO-H]+, Rule of HR True", "127.01563": "Theoretical m/z 127.01547, Mass diff 0 (1.26 ppm), SMILES O=P(O)(OC)OC, Annotation [C2H7O4P+H]+, Rule of HR True", "193.02605": "Theoretical m/z 193.026035, Mass diff 0 (0.08 ppm), SMILES O=CC=C(OP(=O)(OC)OC)C, Annotation [C6H11O5P-H]+, Rule of HR True", "238.08437": "Theoretical m/z 238.083891, Mass diff 0 (2.01 ppm), SMILES O=C(C=C(OP(=O)(OC)OC)C)N(C)C, Annotation [C8H16NO5P+H]+, Rule of HR True"}, "num_peaks": "5", "compound_name": "Dicrotophos", "retention_time": 2.025499, "precursor_mz": 238.0844, "adduct": "[M+H]+", "instrument_type": "LC-ESI-Orbitrap", "peaks_json": [[112.074, 102027.0], [112.07591, 9070987.0], [127.01563, 3230337.0], [193.02605, 7897744.0], [238.08437, 2973124.0]]}, {"scannumber": "1865", "ionmode": "positive", "spectrumtype": "Centroid", "formula": "C5H12NO3PS2", "inchikey": "MCWXGJITAZMZEV-UHFFFAOYSA-N", "smiles": "CN=C(CSP(=S)(OC)OC)O", "authors": "Biomarker Analytical Laboratories, RECETOX, Masaryk University (CZ)", "instrument": "LC Orbitrap Fusion Tribrid MS", "ionization": "ESI+", "license": "CC BY-NC", "peak_comments": {"88.0219": "Theoretical m/z 88.021549, Mass diff 0 (3.99 ppm), SMILES SCC=NC, Annotation [C3H7NS-H]+, Rule of HR True", "124.98233": "Theoretical m/z 124.982067, Mass diff 0 (2.11 ppm), SMILES S=P(OC)OC, Annotation [C2H7O2PS-H]+, Rule of HR True", "142.99275": "Theoretical m/z 142.993177, Mass diff 0 (0 ppm), Formula C2H8O3PS", "156.95422": "Theoretical m/z 156.954136, Mass diff 0 (0.54 ppm), SMILES S=P(S)(OC)OC, Annotation [C2H7O2PS2-H]+, Rule of HR True", "170.97": "Theoretical m/z 170.969791, Mass diff 0 (1.22 ppm), SMILES S=P(OC)(OC)SC, Annotation [C3H9O2PS2-H]+, Rule of HR True", "197.98123": "Theoretical m/z 197.980686, Mass diff 0.001 (2.75 ppm), SMILES S=P(OC)SCC(O)=NC, Annotation [C4H10NO2PS2-H]+, Rule of HR True", "198.96501": "Theoretical m/z 198'..b'.2905090298183795, 5], [0.0007786069695307855, 1], [0.0005160257799646218, 2], [0.00038736811468175473, 3], [0.004167449964456371, 2], [0.002043269072734401, 2], [0.0007317319771959746, 4], [1.9838321402668794e-05, 1], [8.801820482468617e-06, 1], [9.255871433471165e-05, 1], [0.00027188763979513214, 1], [4.4947603414879724e-05, 1], [0.009400379409723776, 2], [0.005755388742032419, 5], [0.0007186188027915625, 4], [0.0002820277712237843, 1], [0.0021840777388529727, 3], [0.0021623907785408305, 3], [0.0007283436921234106, 1], [0.0008441009586501761, 1], [0.0002073559764999649, 1], [0.0007416252801498897, 1], [0.00015068661266851979, 1], [8.813547345301928e-05, 1], [9.26696450737907e-05, 1], [0.001547967118838771, 1], [0.05550228874171714, 5], [0.02125475011618532, 3], [0.011654725377056363, 2], [0.048940875059941075, 1], [0.09674974767603109, 4], [0.00012831189436302386, 1], [0.01967432143668846, 1], [0.01205243016929935, 1], [0.0023135823935159366, 4], [0.005962405101607889, 3], [0.004500417483116905, 5], [0.29457761231941254, 5], [6.06340812854675e-05, 2], [0.0006948984533266634, 1], [0.0007187196610315783, 2], [0.001696224336237004, 4], [0.004199029365250834, 3], [5.80297555852376e-05, 2], [0.002157838608906188, 2], [0.0006784306910464165, 5], [2.6175190827127028e-05, 1], [5.3148579647693586e-05, 3], [0.002344175993704721, 2], [0.0007030269709451109, 2], [6.932179507489616e-05, 2], [0.009409539133589516, 4], [0.007853290243932564, 8], [0.0004251026224145782, 3], [0.004851525009980412, 3], [0.002316715000724008, 4], [0.0024536891814221423, 5], [0.0009568700243380552, 1], [1.2682782292908576e-05, 1], [0.00035372089119100686, 1], [0.00019532961465025935, 1], [0.0007449280704076819, 1], [0.0006070307887188872, 3], [0.008405179318406656, 2], [0.06059234768905473, 5], [0.04007587841437244, 7], [0.011192609898471699, 3], [0.05007411887473937, 1], [0.11083177441713818, 4], [0.00041472952800485167, 2], [0.0002443181917148464, 1], [0.018417594020478472, 1], [0.010756665957563824, 1], [0.015590543256883976, 2], [0.04566648744942041, 1], [5.931306182041711e-06, 1], [0.03353471802351476, 2], [0.0016641176911516493, 1], [0.011667998101389831, 2], [0.00013501455706461823, 2], [0.0005641822404665421, 1], [0.013565745106156183, 3], [0.0022081590773529217, 2], [2.755827193743529e-05, 1], [0.0007386957430155464, 1], [0.004966786726868895, 2], [0.0010661461087560196, 2], [0.001063292846857611, 1], [0.000606881881809848, 1], [0.002186896434193792, 1], [0.12865769017257328, 1], [0.028190890883599714, 4], [0.0005044163468766756, 1], [0.002886337967280529, 1], [0.0559715028711476, 1], [0.0002158797917780897, 1], [0.00620360880796666, 2], [0.0007355365822429641, 3], [0.0007942591768873767, 1], [0.00010209303249019821, 2], [0.00019287244985231734, 1], [0.00016324733223990588, 2], [9.721355793156336e-06, 1], [4.652016835129338e-05, 1], [0.019877538355056665, 1], [0.0021026011257930747, 1], [0.0008375181541441654, 3], [2.8019476783829662e-05, 1], [0.00126319178728046, 2], [0.033408666281750724, 1], [0.00022237422701605942, 1], [0.002265272552324613, 1], [0.000234866112352408, 1], [0.0009646911431102235, 2], [0.0001427118212822249, 1], [0.0015640758643172626, 1], [0.027845060459883365, 1], [0.0005869184307697094, 1], [0.002177606309783109, 1], [0.0011879375171201744, 2], [0.00024257932792313028, 2], [0.029015752159248828, 1], [0.012882620708157606, 1], [0.09087822963404141, 1], [0.016369110194600803, 2], [0.0009837491848604097, 1], [0.0010871277430062854, 1], [0.006379222433724256, 2], [0.07844880045683728, 1], [0.003552995338630374, 1], [0.00675497126450081, 1], [0.020492592767589624, 1], [0.0004882302811255579, 1], [0.03692178564115823, 1], [0.05107327151527259, 1], [0.0013454548424030402, 1], [0.00023077527573114648, 1], [0.0034920044529350115, 1], [0.05588674606358348, 1], [0.10402095547417871, 1], [0.004711174266112351, 1]], "dtype": [["CosineGreedy_0.1_0.0_1.0_scores", "<f8"], ["CosineGreedy_0.1_0.0_1.0_matches", "<i8"]]}\n\\ No newline at end of file\n' |