changeset 3:4cbfd3d0a4c4 draft

"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
author computational-metabolomics
date Wed, 02 Feb 2022 17:29:46 +0000
parents 856b3761277d
children 8fb51147d15e
files sirius_csifingerid.py sirius_csifingerid.xml test-data/annotation_CCMSLIB00000578155_result.tsv test-data/annotation_ML006801.tsv test-data/annotation_RP022611_result.tsv test-data/annotation_RP022611_result_all_adducts.tsv test-data/annotation_generic.tsv test-data/annotation_invalid_adduct_result.tsv test-data/bc_annotation_CCMSLIB00000578155_result.tsv test-data/bc_canopus_CCMSLIB00000578155_result.tsv test-data/canopus_CCMSLIB00000578155_result.tsv test-data/canopus_ML006801.tsv test-data/canopus_RP022611_result.tsv test-data/canopus_RP022611_result_all_adducts.tsv test-data/canopus_generic.tsv test-data/canopus_invalid_adduct_result.tsv test-data/generic.tsv
diffstat 15 files changed, 185 insertions(+), 76 deletions(-) [+]
line wrap: on
line diff
--- a/sirius_csifingerid.py	Thu Jul 02 11:01:45 2020 -0400
+++ b/sirius_csifingerid.py	Wed Feb 02 17:29:46 2022 +0000
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, print_function
-
 import argparse
 import csv
 import glob
@@ -11,11 +9,11 @@
 import uuid
 from collections import defaultdict
 
-import six
 
 parser = argparse.ArgumentParser()
 parser.add_argument('--input_pth')
-parser.add_argument('--result_pth')
+parser.add_argument('--canopus_result_pth')
+parser.add_argument('--annotations_result_pth')
 parser.add_argument('--database')
 parser.add_argument('--profile')
 parser.add_argument('--candidates')
@@ -28,8 +26,11 @@
 parser.add_argument('--meta_select_col', default='all')
 parser.add_argument('--cores_top_level', default=1)
 parser.add_argument('--chunks', default=1)
-parser.add_argument('--minMSMSpeaks', default=1)
+parser.add_argument('--min_MSMS_peaks', default=1)
 parser.add_argument('--rank_filter', default=0)
+parser.add_argument('--confidence_filter', default=0)
+parser.add_argument('--backwards_compatible',
+                    default=False, action='store_true')
 parser.add_argument('--schema', default='msp')
 parser.add_argument('-a', '--adducts', action='append', nargs=1,
                     required=False, default=[], help='Adducts used')
@@ -124,7 +125,7 @@
 def parse_meta(meta_regex, meta_info=None):
     if meta_info is None:
         meta_info = {}
-    for k, regexes in six.iteritems(meta_regex):
+    for k, regexes in meta_regex.items():
         for reg in regexes:
             m = re.search(reg, line, re.IGNORECASE)
             if m:
@@ -203,7 +204,7 @@
         adduct = meta_info['precursor_type']
     else:
         if paramd["default_ion"]:
-            paramd["cli"]["--ion"] = paramd["default_ion"]
+            paramd["cli"]["--adduct"] = paramd["default_ion"]
             adduct = paramd["default_ion"]
         else:
             paramd["cli"]["--auto-charge"] = ''
@@ -217,14 +218,24 @@
         paramd['additional_details']['adduct'] = adduct
 
     # ============== Create CLI cmd for metfrag ===============================
-    cmd = "sirius --fingerid"
-    for k, v in six.iteritems(paramd["cli"]):
-        cmd += " {} {}".format(str(k), str(v))
+    cmd = "sirius --no-citations --ms2 {} --adduct {} --precursor {} -o {} " \
+          "formula -c {} --ppm-max {} --profile {} " \
+          "structure --database {} canopus".format(
+                       paramd["cli"]["--ms2"],
+                       adduct,
+                       paramd["cli"]["--precursor"],
+                       paramd["cli"]["--output"],
+                       paramd["cli"]["--candidates"],
+                       paramd["cli"]["--ppm-max"],
+                       paramd["cli"]["--profile"],
+                       paramd["cli"]["--database"]
+          )
+    print(cmd)
     paramds[paramd["SampleName"]] = paramd
 
     # =============== Run srius ==============================================
     # Filter before process with a minimum number of MS/MS peaks
-    if plinesread >= float(args.minMSMSpeaks):
+    if plinesread >= float(args.min_MSMS_peaks):
 
         if int(args.cores_top_level) == 1:
             os.system(cmd)
@@ -331,57 +342,85 @@
     pool.close()
     pool.join()
 
+
 ######################################################################
 # Concatenate and filter the output
 ######################################################################
 # outputs might have different headers. Need to get a list of all the headers
 # before we start merging the files outfiles = [os.path.join(wd, f) for f in
 # glob.glob(os.path.join(wd, "*_metfrag_result.csv"))]
-outfiles = glob.glob(os.path.join(wd, '*', '*', 'summary_csi_fingerid.csv'))
+def concat_output(filename, result_pth,
+                  rank_filter, confidence_filter, backwards_compatible):
+    outfiles = glob.glob(os.path.join(wd, '*', '*{}'.format(filename)))
 
-# sort files nicely
-outfiles.sort(key=lambda s: int(re.match(r'^.*/('
-                                         r'\d+).*/.*/summary_csi_fingerid.csv',
-                                         s).group(1)))
-print(outfiles)
+    # sort files nicely
+    outfiles.sort(key=lambda s: int(re.match(r'^.*/('
+                                             r'\d+).*{}'.format(filename),
+                                             s).group(1)))
+    print(outfiles)
+
+    if len(outfiles) == 0:
+        print('No results')
+        sys.exit()
+
+    headers = []
 
-if len(outfiles) == 0:
-    print('No results')
-    sys.exit()
+    for fn in outfiles:
+        with open(fn, 'r') as infile:
+            reader = csv.reader(infile, delimiter='\t')
+            if sys.version_info >= (3, 0):
+                headers.extend(next(reader))
+            else:
+                headers.extend(reader.next())
+            break
+
+    headers = list(paramd['additional_details'].keys()) + headers
 
-headers = []
-c = 0
-for fn in outfiles:
-    with open(fn, 'r') as infile:
-        reader = csv.reader(infile, delimiter='\t')
-        if sys.version_info >= (3, 0):
-            headers.extend(next(reader))
-        else:
-            headers.extend(reader.next())
-        break
+    with open(result_pth, 'a') as merged_outfile:
+        dwriter = csv.DictWriter(merged_outfile,
+                                 fieldnames=headers, delimiter='\t')
+        dwriter.writeheader()
+
+        for fn in sorted(outfiles):
+            print(fn)
+
+            with open(fn) as infile:
+                reader = csv.DictReader(infile, delimiter='\t')
 
-headers = list(paramd['additional_details'].keys()) + headers
+                ad = paramds[fn.split(os.sep)[-2]]['additional_details']
+
+                for line in reader:
+                    if 'rank' in line and \
+                            0 < int(rank_filter) < int(line['rank']):
+                        # filter out those annotations greater than rank filter
+                        # If rank_filter is zero then skip
+                        continue
 
-with open(args.result_pth, 'a') as merged_outfile:
-    dwriter = csv.DictWriter(merged_outfile,
-                             fieldnames=headers, delimiter='\t')
-    dwriter.writeheader()
+                    if 'ConfidenceScore' in line \
+                            and 0 < int(confidence_filter) < int(line['rank']):
+                        # filter out those annotations greater than rank filter
+                        # If rank_filter is zero then skip
+                        continue
+                    line.update(ad)
 
-    for fn in sorted(outfiles):
-        print(fn)
-
-        with open(fn) as infile:
-            reader = csv.DictReader(infile, delimiter='\t')
+                    dwriter.writerow(line)
 
-            ad = paramds[fn.split(os.sep)[-3]]['additional_details']
+    if backwards_compatible:
+        # Headers required in this format for tools that used
+        # v4.9.3 of SIRIUS-CSI:FingerID
+        s1 = "sed 's/InChIkey2D/inchikey2d/g' {r} > {r}".format(r=result_pth)
+        os.system(s1)
+        s2 = "sed 's/CSI:FingerIDScore/Score/' {r} > {r}".format(r=result_pth)
+        os.system(s2)
+
 
-            for line in reader:
-                if 0 < int(args.rank_filter) < int(line['rank']):
-                    # filter out those annotations greater than rank filter
-                    # If rank_filter is zero then skip
-                    continue
-                line.update(ad)
-                # round score to 5 d.p.
-                line['score'] = round(float(line['score']), 5)
-
-                dwriter.writerow(line)
+concat_output('canopus_summary.tsv',
+              args.canopus_result_pth,
+              args.rank_filter,
+              args.confidence_filter,
+              args.backwards_compatible)
+concat_output('compound_identifications.tsv',
+              args.annotations_result_pth,
+              0,
+              0,
+              False)
--- a/sirius_csifingerid.xml	Thu Jul 02 11:01:45 2020 -0400
+++ b/sirius_csifingerid.xml	Wed Feb 02 17:29:46 2022 +0000
@@ -1,9 +1,9 @@
 <tool id="sirius_csifingerid" name="SIRIUS-CSI:FingerID"
-      version="4.0.1+galaxy4" profile="18.01">
+      version="4.9.8+galaxy0" profile="21.01">
     <description>is used to identify metabolites using single and
         tandem mass spectrometry</description>
     <requirements>
-        <requirement type="package" version="4.0.1">
+        <requirement type="package" version="4.9.8">
             sirius-csifingerid</requirement>
     </requirements>
     <command detect_errors="exit_code">
@@ -17,10 +17,11 @@
             --ppm_max $ppm_max
             --polarity $polarity
             --out_dir .
-            --result_pth sirius_all_summary.tsv
+            --canopus_result_pth canopus_all_summary.tsv
+            --annotations_result_pth annotations_all_summary.tsv
             --cores_top_level 1
             --meta_select_col $meta_select_col
-            --minMSMSpeaks $minMSMSpeaks
+            --min_MSMS_peaks $min_MSMS_peaks
             --schema $schema
             --temp_dir .
 
@@ -47,6 +48,13 @@
             #end if
 
             --rank_filter $rank_filter
+
+            --confidence_filter $confidence_filter
+
+            #if $backwards_compatible
+                --backwards_compatible
+            #end if
+
     ]]></command>
     <inputs>
         <param name="input" argument="--input_pth" type="data" format="msp"
@@ -54,11 +62,12 @@
         <param argument="--database" type="select"
                label="Select SIRIUS-CSI:FingerID Database" >
             <option value="PubChem" >PubChem</option>
-            <option selected="true" value="hmdb">HMDB</option>
+            <option value="hmdb">HMDB</option>
             <option value="kegg">KEGG</option>
             <option value="knapsack">KNApSAcK</option>
             <option value="biocyc">BioCyc</option>
-            <option  value="all">All (see help)</option>
+            <option selected="true" value="bio">Bio (all biological)</option>
+            <option  value="all">All use all databases</option>
         </param>
         <param argument="--ppm_max" type="integer" value="10" min="0"
                label="Mass deviation of the fragment peaks in ppm" />
@@ -97,7 +106,7 @@
             <option value="all">
                 Extra metadata columns from all MSP parameters</option>
         </param>
-        <param argument="--minMSMSpeaks" type="integer" min="0" value="0"
+        <param argument="--min_MSMS_peaks" type="integer" min="0" value="0"
                label="Minimum number of MS/MS peaks"/>
 
         <conditional name="adducts_cond">
@@ -133,38 +142,53 @@
         <param argument="--rank_filter" type="integer" value="0"
                label="Only show the top ranked annotations less than or equal
                       to this value (default to show all annotations)"/>
+
+        <param argument="--confidence_filter" type="integer" value="0"
+               label="Only show annotations greater than or or equal
+                      to this value (default to show all annotations)"/>
+
+        <param argument="--backwards_compatible" type="boolean" checked="false"
+               label="Makes the outputs compatible with annotation workflows that used the old output from
+                      SIRIUS:CSI:FingerID v4.0.1"/>
     </inputs>
     <outputs>
-        <data name="results" format="tsv"
-              from_work_dir="sirius_all_summary.tsv"/>
+        <data name="canopus_results" format="tsv" label="${tool.name} on ${on_string}: CANOPUS"
+              from_work_dir="canopus_all_summary.tsv"/>
+        <data name="annotation_results" format="tsv" label="${tool.name} on ${on_string}: Annotations"
+              from_work_dir="annotations_all_summary.tsv"/>
     </outputs>
     <tests>
         <test>
             <!-- Test "massbank" style data format  -->
             <param name="input" value="ML006801.txt"  ftype="msp"/>
-            <output name="results" file="ML006801.tsv"/>
+            <output name="annotation_results" file="annotation_ML006801.tsv"/>
+            <output name="canopus_results" file="canopus_ML006801.tsv"/>
         </test>
         <test>
             <!-- Test "generic format" style data format  -->
             <param name="input" value="generic.msp" ftype="msp"/>
-            <output name="results" file="generic.tsv"/>
+            <output name="annotation_results" file="annotation_generic.tsv"/>
+            <output name="canopus_results" file="canopus_generic.tsv"/>
         </test>
         <test>
             <!-- Test for glucose (qtof) MassBank data format  -->
             <param name="input" value="RP022611.txt" ftype="msp"/>
             <param name="profile" value="qtof"/>
-            <output name="results" file="RP022611_result.tsv"/>
+            <output name="annotation_results" file="annotation_RP022611_result.tsv"/>
+            <output name="canopus_results" file="canopus_RP022611_result.tsv"/>
         </test>
         <test>
-            <!-- Test for glucose (q-exactive) GNPS, MoNA data format  -->
+            <!-- Test for glucose (q-exactive) GNPS, MoNA data format  (and test canopus)-->
             <param name="input" value="CCMSLIB00000578155.msp" ftype="msp"/>
             <param name="profile" value="orbitrap"/>
-            <output name="results" file="CCMSLIB00000578155_result.tsv"/>
+            <output name="annotation_results" file="annotation_CCMSLIB00000578155_result.tsv"/>
+            <output name="canopus_results" file="canopus_CCMSLIB00000578155_result.tsv"/>
         </test>
         <test>
             <!-- Test invalid adduct  -->
             <param name="input" value="invalid_adduct.msp" ftype="msp"/>
-            <output name="results" file="invalid_adduct_result.tsv"/>
+            <output name="annotation_results" file="annotation_invalid_adduct_result.tsv"/>
+            <output name="canopus_results" file="canopus_invalid_adduct_result.tsv"/>
         </test>
         <test>
             <!-- Test all adducts  -->
@@ -174,8 +198,16 @@
             <conditional name="adducts_cond">
                 <param name="adducts_selector" value="all"/>
             </conditional>
-            <output name="results" file="RP022611_result_all_adducts.tsv"/>
-
+            <output name="annotation_results" file="annotation_RP022611_result_all_adducts.tsv"/>
+            <output name="canopus_results" file="canopus_RP022611_result_all_adducts.tsv"/>
+        </test>
+        <test>
+            <!-- Test for backward compa format-->
+            <param name="input" value="CCMSLIB00000578155.msp" ftype="msp"/>
+            <param name="profile" value="orbitrap"/>
+            <param name="backwards_compatible" value="--backwards_compatible"/>
+            <output name="annotation_results" file="bc_annotation_CCMSLIB00000578155_result.tsv"/>
+            <output name="canopus_results" file="bc_canopus_CCMSLIB00000578155_result.tsv"/>
         </test>
     </tests>
     <help>
@@ -215,7 +247,9 @@
 
 * BioCyc
 
-* All (SIRIUS will consider all m/z possible molecular formulas) 
+* Bio (default in CLI)
+
+* All
 
 **\3. Mass deviation of the fragment peaks in ppm**
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotation_CCMSLIB00000578155_result.tsv	Wed Feb 02 17:29:46 2022 +0000
@@ -0,0 +1,2 @@
+name	adduct	rank	formulaRank	#adducts	#predictedFPs	ConfidenceScore	CSI:FingerIDScore	ZodiacScore	SiriusScore	molecularFormula	adduct	InChIkey2D	InChI	name	smiles	xlogp	pubchemids	links	dbflags	ionMass	retentionTimeInSeconds	id
+D-GLUCOSE-6-PHOSPHATE	[M-H]-	1	1	1	1	0.2777991179997831	-40.17179067053119	N/A	17.8469785346642	C6H13O9P	[M-H]-	NBSCHQHZLSJFNQ	InChI=1S/C6H13O9P/c7-3-2(1-14-16(11,12)13)15-6(10)5(9)4(3)8/h2-10H,1H2,(H2,11,12,13)	D-GLUCOSE-6-PHOSPHATE	C(C1C(C(C(C(O1)O)O)O)O)OP(=O)(O)O	-4.19999981	208;5958;65127;439198;439284;439404;439427;440100;447096;449526;4178491;4459709;9817215;9859975;10848963;12314997;11536233;10038266;12598269;11651816;11651817;11701643;10332946;10422797;10422798;11499884;16219407;21604864;21604865;23421197;23421199;23421200;25200774;25244236;24802166;42609823;44589902;44629605;46936284;51351673;51351674;59660207;59660208;66804219;72200063;71048769;70828590;90657928;90087729;92043642;89530481;89533633;92144442;92331698;92331699;92450038;102072969;101251820;100983220;124302956;124303605;129703999;129800866;133556303;134813555;135054589;135070792	HMDB:(1078 1401 3498);SuperNatural:(SN00005991 SN00006877 SN00368182 SN00005992);ZINC bio:(ZINC01529564 ZINC01532857 ZINC03869395 ZINC03869396 ZINC03869397 ZINC03869398 ZINC03875374 ZINC03875375 ZINC04096188 ZINC08551508 ZINC13540027);additional;MeSH:(65127 25244236);Plantcyc:(CPD-15711 GLC-6-P CPD-15712 CPD-1241 D-HEXOSE-6-PHOSPHATE MANNOSE-6P ALPHA-GLC-6-P);PubMed;NORMAN:(NS00015226);COCONUT:(CNP0093800 CNP0209092);KNApSAcK:(7307);Natural Products:(UNPD119019 UNPD208877);PubChem:(208 5958 65127 439198 439284 439404 439427 440100 447096 449526 4178491 4459709 9817215 9859975 10848963 12314997 11536233 10038266 12598269 11651816 11651817 11701643 10332946 10422797 10422798 11499884 16219407 21604864 21604865 23421197 23421199 23421200 25200774 25244236 24802166 42609823 44589902 44629605 46936284 51351673 51351674 59660207 59660208 66804219 72200063 71048769 70828590 90657928 90087729 92043642 89530481 89533633 92144442 92331698 92331699 92450038 102072969 101251820 100983220 124302956 124303605 129703999 129800866 133556303 134813555 135054589 135070792);CHEBI:(49728 91004 61548 58735 58247 134068 4170 4141 136602 58225 47944 43896 17719 48066 17665 61567 60332 61667 41076);PubChem class - bio and metabolites;KEGG:(C00275 C01172 C02965 C03735 C00668 C01113 C00092 C02962);YMDB:(2311 311 947 966 242);Training Set;Biocyc:(CPD-15711 GLC-6-P CPD-15712 CPD-1241 D-HEXOSE-6-PHOSPHATE MANNOSE-6P CPD1A0-6282 ALPHA-GLC-6-P SMA-0000014 SMA-0000409)	20965758	259.022	NaN	0_unknown_
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotation_ML006801.tsv	Wed Feb 02 17:29:46 2022 +0000
@@ -0,0 +1,2 @@
+name	adduct	rank	formulaRank	#adducts	#predictedFPs	ConfidenceScore	CSI:FingerIDScore	ZodiacScore	SiriusScore	molecularFormula	adduct	InChIkey2D	InChI	name	smiles	xlogp	pubchemids	links	dbflags	ionMass	retentionTimeInSeconds	id
+L-thialysine; LC-ESI-ITFT; MS2; CE: 50%; R=7500; [M+H]+	[M+H]+	1	1	1	1	0.9560298733520203	-6.599755497315031	N/A	21.01043087165301	C5H12N2O2S	[M+H]+	GHSJKUNUIHUPDF	InChI=1S/C5H12N2O2S/c6-1-2-10-3-4(7)5(8)9/h4H,1-3,6-7H2,(H,8,9)	L-thialysine; LC-ESI-ITFT; MS2; CE: 50%; R=7500; [M+H]+	C(CSCC(C(=O)O)N)N	-3.70000005	20049;99558;6995002;12898158;54754416;57517225	HMDB:(29178 33518);SuperNatural:(SN00250912);ZINC bio:(ZINC01705407 ZINC01731787);additional;MeSH:(20049 99558);Plantcyc:(S-2-AMINOETHYL-L-CYSTEINE);PubMed;NORMAN:(NS00014577);COCONUT:(CNP0297414);Natural Products:(UNPD166389);PubChem:(20049 99558 6995002 12898158 54754416 57517225);CHEBI:(497734);PubChem class - bio and metabolites;Training Set;Biocyc:(S-2-AMINOETHYL-L-CYSTEINE)	20899950	165.0692	NaN	0_unknown_
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotation_RP022611_result.tsv	Wed Feb 02 17:29:46 2022 +0000
@@ -0,0 +1,2 @@
+name	adduct	rank	formulaRank	#adducts	#predictedFPs	ConfidenceScore	CSI:FingerIDScore	ZodiacScore	SiriusScore	molecularFormula	adduct	InChIkey2D	InChI	name	smiles	xlogp	pubchemids	links	dbflags	ionMass	retentionTimeInSeconds	id
+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	[M-H]-	1	1	1	1	0.31290839506383084	-56.37857062945502	N/A	35.11438137495146	C6H12O6	[M-H]-	WQZGKKKJIJFFOK	InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2	D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	C(C1C(C(C(C(O1)O)O)O)O)O	-2.5999999	206;5793;6036;18950;1549080;64689;79025;81696;657055;185698;439353;439357;439507;439583;439680;441032;441033;441034;441035;3000450;444314;448388;448702;451187;451188;451189;452245;455147;3034742;2724488;7018164;5104362;7043897;7044038;7098663;7098664;7157007;6102790;5319264;6321330;6323336;6400264;6560213;6971003;6971007;6971016;6971096;6971097;6971098;6992021;6992084;9794056;12003287;9815418;9834129;12193653;12285853;12285856;12285861;12285862;12285863;12285866;12285870;12285871;12285873;12285877;12285878;12285879;12285885;12285886;12285889;12285890;12285891;12285892;12285893;12285894;9899007;10899282;10035228;10954241;11571906;11571917;10081060;11600783;10103794;11019447;11030410;10130220;11651921;11672764;10197954;10219674;10219763;10313382;10329946;11344362;11959770;11367383;11970126;11412863;11480819;11492034;16211884;16211941;16211984;16211986;16212959;16212960;16212966;16213546;16213640;16213872;16217112;16219580;22825318;22836365;22836366;21355827;23424086;24802149;24802163;24802281;24892722;42626680;46188479;44328781;44328785;46780441;46897877;51340651;50939543;54445181;54445182;57197748;57288387;57483528;60052896;60078648;56845432;56845995;59034276;59036328;59040622;59083882;59105109;59125088;57691826;58594768;59146659;58595959;58618581;59503411;59503407;59383280;57973135;59445439;58969552;59886072;58070804;59965103;58265153;58265160;58265166;58265178;58265190;58265196;58300638;66629908;67518639;69528681;67615000;67615455;67641738;68167579;68324677;68334110;67938791;67944215;67944290;67950444;70543261;71777654;70443535;71309028;71309128;71309129;71309140;71309397;71309503;71309513;71309514;71309671;71309852;71309905;71309908;71309927;71317094;71317095;71317096;71317097;71317182;75357255;76973265;86278404;88547603;88255060;87297824;89000581;90057933;90781811;87929779;87931119;88974141;92043367;92043446;90159939;90895196;89200515;89332529;90346255;89374440;91057721;89742272;89424182;90470917;90472751;90472752;90472753;90472761;90472762;90472770;90473076;89855666;101015849;101033892;101513786;102089288;101254308;101254309;101254310;101254311;101254312;101254313;101254314;101254315;101718250;101718251;101796201;101469918;102601142;102601177;102601371;102601743;102601816;102447462;102447463;117064633;117064644;117065485;117633116;117768413;118797420;118797610;118797621;118797622;118855904;118855887;118855889;118855910;118855920;118855925;117938207;118924468;122522140;121494046;121494058;122360911;126704391;133119158;133119249;133121364;129629038;131698424;131698425;131698450;131699179;131706405;132939819;132939820;133662560;133662561;133662562;131842051;134695353;131966764;134860471;139025182;137554722	HMDB:(122 516 3345 1151 62202 12326 3449 169 33704 143 61922);PubChem class - food;SuperNatural:(SN00048687 SN00050524 SN00321479 SN00064345 SN00003330 SN00396363 SN00005681 SN00330500 SN00156785 SN00156795 SN00001876 SN00048690 SN00226998 SN00064344 SN00273897 SN00048679);ZINC bio:(ZINC00895026 ZINC00895055 ZINC00895084 ZINC00895350 ZINC00896168 ZINC00896169 ZINC00897147 ZINC00901155 ZINC01529206 ZINC01530434 ZINC01532520 ZINC01532549 ZINC02545114 ZINC02597049 ZINC03581097 ZINC03830678 ZINC03830679 ZINC03833800 ZINC03860903 ZINC03861213 ZINC04095659 ZINC04095660 ZINC04097150 ZINC04097151 ZINC04097152 ZINC04097153 ZINC04262018 ZINC04529488);additional;MeSH:(5793 6036 18950 79025 439357 439507 439680 441032 441033 441035 11030410);Plantcyc:(CPD-3607 ALPHA-D-GALACTOSE X-DMAN-HEX-);PubMed;NORMAN:(NS00007629);COCONUT:(CNP0082250 CNP0210547);KNApSAcK:(1126);Natural Products:(UNPD148053 UNPD130932 UNPD191130 UNPD175204 UNPD20367 UNPD175249 UNPD72621 UNPD116684 UNPD158921 UNPD175399 UNPD119270 UNPD83717);PubChem:(206 5793 6036 18950 1549080 64689 79025 81696 657055 185698 439353 439357 439507 439583 439680 441032 441033 441034 441035 3000450 444314 448388 448702 451187 451188 451189 452245 455147 3034742 2724488 7018164 5104362 7043897 7044038 7098663 7098664 7157007 6102790 5319264 6321330 6323336 6400264 6560213 6971003 6971007 6971016 6971096 6971097 6971098 6992021 6992084 9794056 12003287 9815418 9834129 12193653 12285853 12285856 12285861 12285862 12285863 12285866 12285870 12285871 12285873 12285877 12285878 12285879 12285885 12285886 12285889 12285890 12285891 12285892 12285893 12285894 9899007 10899282 10035228 10954241 11571906 11571917 10081060 11600783 10103794 11019447 11030410 10130220 11651921 11672764 10197954 10219674 10219763 10313382 10329946 11344362 11959770 11367383 11970126 11412863 11480819 11492034 16211884 16211941 16211984 16211986 16212959 16212960 16212966 16213546 16213640 16213872 16217112 16219580 22825318 22836365 22836366 21355827 23424086 24802149 24802163 24802281 24892722 42626680 46188479 44328781 44328785 46780441 46897877 51340651 50939543 54445181 54445182 57197748 57288387 57483528 60052896 60078648 56845432 56845995 59034276 59036328 59040622 59083882 59105109 59125088 57691826 58594768 59146659 58595959 58618581 59503411 59503407 59383280 57973135 59445439 58969552 59886072 58070804 59965103 58265153 58265160 58265166 58265178 58265190 58265196 58300638 66629908 67518639 69528681 67615000 67615455 67641738 68167579 68324677 68334110 67938791 67944215 67944290 67950444 70543261 71777654 70443535 71309028 71309128 71309129 71309140 71309397 71309503 71309513 71309514 71309671 71309852 71309905 71309908 71309927 71317094 71317095 71317096 71317097 71317182 75357255 76973265 86278404 88547603 88255060 87297824 89000581 90057933 90781811 87929779 87931119 88974141 92043367 92043446 90159939 90895196 89200515 89332529 90346255 89374440 91057721 89742272 89424182 90470917 90472751 90472752 90472753 90472761 90472762 90472770 90473076 89855666 101015849 101033892 101513786 102089288 101254308 101254309 101254310 101254311 101254312 101254313 101254314 101254315 101718250 101718251 101796201 101469918 102601142 102601177 102601371 102601743 102601816 102447462 102447463 117064633 117064644 117065485 117633116 117768413 118797420 118797610 118797621 118797622 118855904 118855887 118855889 118855910 118855920 118855925 117938207 118924468 122522140 121494046 121494058 122360911 126704391 133119158 133119249 133121364 129629038 131698424 131698425 131698450 131699179 131706405 132939819 132939820 133662560 133662561 133662562 131842051 134695353 131966764 134860471 139025182 137554722);CHEBI:(37686 80962 37692 28729 28563 72452 68462 27667 37680 37620 28102 63421 141392 42905 27517 15903 37677 27380 28061 18269 37706 59552 37630 4191 40656 37744 15444 59573 4093 37740 28100 4139 37679 4167 88300 18398 86059 37627 37619 83029 37704 37671 37631 27857 37693 18246 4208 43104 37741 17925);PubChem class - bio and metabolites;KEGG:(C00737 C01487 C06466 C06465 C15923 C06464 C00031 C06467 C00962 C02209 C00221 C21032 C01825 C00124 C00936 C00738 C00984 C21050 C00267 C00159);PubChem class - safety and toxic;YMDB:(273 146 894 286 72 846);Training Set;PubChem class - drug;Biocyc:(SMA-0000448 CPD-15625 CPD-15622 CPD-15758 ALPHA-GLUCOSE CPD-15628 SMA-0000188 CPD-3607 CPD-18461 CPD-12601 CPD-15627 SMA-0000654 CPD185E-2 SMA-0000436 CPD-13428 CPD-15624 Alpha-D-Talose CPD-15621 CPD-2361 CPD-11613 CPD-15762 CPD-15757 CPD-15761 GLC CPD-9918 D-AMINO-ACID GALACTOSE SMA-0000420 ALPHA-D-GALACTOSE CPD-15759 CPD-5725 SMA-0000670 L-GALACTOSE CPD-11611 CPD-13559)	255846782	179.0561	NaN	0_unknown_
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotation_RP022611_result_all_adducts.tsv	Wed Feb 02 17:29:46 2022 +0000
@@ -0,0 +1,4 @@
+name	adduct	rank	formulaRank	#adducts	#predictedFPs	ConfidenceScore	CSI:FingerIDScore	ZodiacScore	SiriusScore	molecularFormula	adduct	InChIkey2D	InChI	name	smiles	xlogp	pubchemids	links	dbflags	ionMass	retentionTimeInSeconds	id
+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	[M-H]-	1	1	1	1	0.31290839506383084	-56.37857062945502	N/A	35.11438137495146	C6H12O6	[M-H]-	WQZGKKKJIJFFOK	InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2	D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	C(C1C(C(C(C(O1)O)O)O)O)O	-2.5999999	206;5793;6036;18950;1549080;64689;79025;81696;657055;185698;439353;439357;439507;439583;439680;441032;441033;441034;441035;3000450;444314;448388;448702;451187;451188;451189;452245;455147;3034742;2724488;7018164;5104362;7043897;7044038;7098663;7098664;7157007;6102790;5319264;6321330;6323336;6400264;6560213;6971003;6971007;6971016;6971096;6971097;6971098;6992021;6992084;9794056;12003287;9815418;9834129;12193653;12285853;12285856;12285861;12285862;12285863;12285866;12285870;12285871;12285873;12285877;12285878;12285879;12285885;12285886;12285889;12285890;12285891;12285892;12285893;12285894;9899007;10899282;10035228;10954241;11571906;11571917;10081060;11600783;10103794;11019447;11030410;10130220;11651921;11672764;10197954;10219674;10219763;10313382;10329946;11344362;11959770;11367383;11970126;11412863;11480819;11492034;16211884;16211941;16211984;16211986;16212959;16212960;16212966;16213546;16213640;16213872;16217112;16219580;22825318;22836365;22836366;21355827;23424086;24802149;24802163;24802281;24892722;42626680;46188479;44328781;44328785;46780441;46897877;51340651;50939543;54445181;54445182;57197748;57288387;57483528;60052896;60078648;56845432;56845995;59034276;59036328;59040622;59083882;59105109;59125088;57691826;58594768;59146659;58595959;58618581;59503411;59503407;59383280;57973135;59445439;58969552;59886072;58070804;59965103;58265153;58265160;58265166;58265178;58265190;58265196;58300638;66629908;67518639;69528681;67615000;67615455;67641738;68167579;68324677;68334110;67938791;67944215;67944290;67950444;70543261;71777654;70443535;71309028;71309128;71309129;71309140;71309397;71309503;71309513;71309514;71309671;71309852;71309905;71309908;71309927;71317094;71317095;71317096;71317097;71317182;75357255;76973265;86278404;88547603;88255060;87297824;89000581;90057933;90781811;87929779;87931119;88974141;92043367;92043446;90159939;90895196;89200515;89332529;90346255;89374440;91057721;89742272;89424182;90470917;90472751;90472752;90472753;90472761;90472762;90472770;90473076;89855666;101015849;101033892;101513786;102089288;101254308;101254309;101254310;101254311;101254312;101254313;101254314;101254315;101718250;101718251;101796201;101469918;102601142;102601177;102601371;102601743;102601816;102447462;102447463;117064633;117064644;117065485;117633116;117768413;118797420;118797610;118797621;118797622;118855904;118855887;118855889;118855910;118855920;118855925;117938207;118924468;122522140;121494046;121494058;122360911;126704391;133119158;133119249;133121364;129629038;131698424;131698425;131698450;131699179;131706405;132939819;132939820;133662560;133662561;133662562;131842051;134695353;131966764;134860471;139025182;137554722	HMDB:(122 516 3345 1151 62202 12326 3449 169 33704 143 61922);PubChem class - food;SuperNatural:(SN00048687 SN00050524 SN00321479 SN00064345 SN00003330 SN00396363 SN00005681 SN00330500 SN00156785 SN00156795 SN00001876 SN00048690 SN00226998 SN00064344 SN00273897 SN00048679);ZINC bio:(ZINC00895026 ZINC00895055 ZINC00895084 ZINC00895350 ZINC00896168 ZINC00896169 ZINC00897147 ZINC00901155 ZINC01529206 ZINC01530434 ZINC01532520 ZINC01532549 ZINC02545114 ZINC02597049 ZINC03581097 ZINC03830678 ZINC03830679 ZINC03833800 ZINC03860903 ZINC03861213 ZINC04095659 ZINC04095660 ZINC04097150 ZINC04097151 ZINC04097152 ZINC04097153 ZINC04262018 ZINC04529488);additional;MeSH:(5793 6036 18950 79025 439357 439507 439680 441032 441033 441035 11030410);Plantcyc:(CPD-3607 ALPHA-D-GALACTOSE X-DMAN-HEX-);PubMed;NORMAN:(NS00007629);COCONUT:(CNP0082250 CNP0210547);KNApSAcK:(1126);Natural Products:(UNPD148053 UNPD130932 UNPD191130 UNPD175204 UNPD20367 UNPD175249 UNPD72621 UNPD116684 UNPD158921 UNPD175399 UNPD119270 UNPD83717);PubChem:(206 5793 6036 18950 1549080 64689 79025 81696 657055 185698 439353 439357 439507 439583 439680 441032 441033 441034 441035 3000450 444314 448388 448702 451187 451188 451189 452245 455147 3034742 2724488 7018164 5104362 7043897 7044038 7098663 7098664 7157007 6102790 5319264 6321330 6323336 6400264 6560213 6971003 6971007 6971016 6971096 6971097 6971098 6992021 6992084 9794056 12003287 9815418 9834129 12193653 12285853 12285856 12285861 12285862 12285863 12285866 12285870 12285871 12285873 12285877 12285878 12285879 12285885 12285886 12285889 12285890 12285891 12285892 12285893 12285894 9899007 10899282 10035228 10954241 11571906 11571917 10081060 11600783 10103794 11019447 11030410 10130220 11651921 11672764 10197954 10219674 10219763 10313382 10329946 11344362 11959770 11367383 11970126 11412863 11480819 11492034 16211884 16211941 16211984 16211986 16212959 16212960 16212966 16213546 16213640 16213872 16217112 16219580 22825318 22836365 22836366 21355827 23424086 24802149 24802163 24802281 24892722 42626680 46188479 44328781 44328785 46780441 46897877 51340651 50939543 54445181 54445182 57197748 57288387 57483528 60052896 60078648 56845432 56845995 59034276 59036328 59040622 59083882 59105109 59125088 57691826 58594768 59146659 58595959 58618581 59503411 59503407 59383280 57973135 59445439 58969552 59886072 58070804 59965103 58265153 58265160 58265166 58265178 58265190 58265196 58300638 66629908 67518639 69528681 67615000 67615455 67641738 68167579 68324677 68334110 67938791 67944215 67944290 67950444 70543261 71777654 70443535 71309028 71309128 71309129 71309140 71309397 71309503 71309513 71309514 71309671 71309852 71309905 71309908 71309927 71317094 71317095 71317096 71317097 71317182 75357255 76973265 86278404 88547603 88255060 87297824 89000581 90057933 90781811 87929779 87931119 88974141 92043367 92043446 90159939 90895196 89200515 89332529 90346255 89374440 91057721 89742272 89424182 90470917 90472751 90472752 90472753 90472761 90472762 90472770 90473076 89855666 101015849 101033892 101513786 102089288 101254308 101254309 101254310 101254311 101254312 101254313 101254314 101254315 101718250 101718251 101796201 101469918 102601142 102601177 102601371 102601743 102601816 102447462 102447463 117064633 117064644 117065485 117633116 117768413 118797420 118797610 118797621 118797622 118855904 118855887 118855889 118855910 118855920 118855925 117938207 118924468 122522140 121494046 121494058 122360911 126704391 133119158 133119249 133121364 129629038 131698424 131698425 131698450 131699179 131706405 132939819 132939820 133662560 133662561 133662562 131842051 134695353 131966764 134860471 139025182 137554722);CHEBI:(37686 80962 37692 28729 28563 72452 68462 27667 37680 37620 28102 63421 141392 42905 27517 15903 37677 27380 28061 18269 37706 59552 37630 4191 40656 37744 15444 59573 4093 37740 28100 4139 37679 4167 88300 18398 86059 37627 37619 83029 37704 37671 37631 27857 37693 18246 4208 43104 37741 17925);PubChem class - bio and metabolites;KEGG:(C00737 C01487 C06466 C06465 C15923 C06464 C00031 C06467 C00962 C02209 C00221 C21032 C01825 C00124 C00936 C00738 C00984 C21050 C00267 C00159);PubChem class - safety and toxic;YMDB:(273 146 894 286 72 846);Training Set;PubChem class - drug;Biocyc:(SMA-0000448 CPD-15625 CPD-15622 CPD-15758 ALPHA-GLUCOSE CPD-15628 SMA-0000188 CPD-3607 CPD-18461 CPD-12601 CPD-15627 SMA-0000654 CPD185E-2 SMA-0000436 CPD-13428 CPD-15624 Alpha-D-Talose CPD-15621 CPD-2361 CPD-11613 CPD-15762 CPD-15757 CPD-15761 GLC CPD-9918 D-AMINO-ACID GALACTOSE SMA-0000420 ALPHA-D-GALACTOSE CPD-15759 CPD-5725 SMA-0000670 L-GALACTOSE CPD-11611 CPD-13559)	255846782	179.0561	NaN	0_unknown_
+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	[M+HCOO]-	1	1	1	1	0.24088665114193236	-81.22229366045529	N/A	35.11438137495146	C5H10O4	[M+HCOO]-	WDRISBUVHBMJEF	InChI=1S/C5H10O4/c1-3(7)5(9)4(8)2-6/h2-5,7-9H,1H3	D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	CC(C(C(C=O)O)O)O	-1.06286943	191114;13420529;11708064;18646774;45489773;53470544;57557847;88414315;92977673;98621168;135056775	PubChem:(191114 13420529 11708064 18646774 45489773 53470544 57557847 88414315 92977673 98621168 135056775);CHEBI:(62012);ZINC bio:(ZINC03652727 ZINC34194163 ZINC39644846);MeSH:(191114);Plantcyc:(CPD0-2167);Biocyc:(CPD0-2167);KEGG Mine	8590075942	179.0561	NaN	0_unknown_
+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	[M+CH3COO]-	1	1	1	1	0.20584202493535325	-67.54430717221075	N/A	26.30175603007533	C4H8O4	[M+CH3COO]-	YTBSYETUWUMLBZ	InChI=1S/C4H8O4/c5-1-3(7)4(8)2-6/h1,3-4,6-8H,2H2	D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	C(C(C(C=O)O)O)O	-2.20000005	94176;101561;439665;5460672;5460674;15559153;71774579;90145916;89822422;121493653;121493654;130699532;131879647;131883807;131887274	SuperNatural:(SN00394603);ZINC bio:(ZINC01760169 ZINC01760171 ZINC18185147);MeSH:(94176 101561 5460672);Plantcyc:(THREOSE);PubMed;NORMAN:(NS00032729);COCONUT:(CNP0166180);KNApSAcK:(7412);Natural Products:(UNPD167167);PubChem:(94176 101561 439665 5460672 5460674 15559153 71774579 90145916 89822422 121493653 121493654 130699532 131879647 131883807 131887274);YMDB Mine;CHEBI:(27904 28587 21405 21288);PubChem class - bio and metabolites;EcoCyc Mine;PubChem class - safety and toxic;Training Set;Biocyc:(ERYTHROSE THREOSE);KEGG Mine	60217026678	179.0561	NaN	0_unknown_
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotation_generic.tsv	Wed Feb 02 17:29:46 2022 +0000
@@ -0,0 +1,6 @@
+name	adduct	rank	formulaRank	#adducts	#predictedFPs	ConfidenceScore	CSI:FingerIDScore	ZodiacScore	SiriusScore	molecularFormula	adduct	InChIkey2D	InChI	name	smiles	xlogp	pubchemids	links	dbflags	ionMass	retentionTimeInSeconds	id
+MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA	[M+H]+	1	1	1	1	0.26604529106484975	-118.22506236976034	N/A	23.3452897589273	C4H7NO	[M+H]+	ABBZJHFBQXYTLU	InChI=1S/C4H7NO/c1-2-3-4(5)6/h2H,1,3H2,(H2,5,6)	MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA	C=CCC(=O)N	-0.100000001	541483;11105303	COCONUT:(CNP0292806);Natural Products:(UNPD154526);PubChem:(541483 11105303);SuperNatural:(SN00312732);ZINC bio:(ZINC02525865);PubChem class - bio and metabolites;PubMed	19963970	86.0606307983398	NaN	0_unknown_
+MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA	[M+H]+	1	1	1	1	0.20971889099889915	-122.29555245689393	N/A	21.957862557474318	C4H9N	[M+H]+	IOXXVNYDGIXMIP	InChI=1S/C4H9N/c1-3-4-5-2/h3,5H,1,4H2,2H3	MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA	CNCC=C	0.400000006	69391;37888249	NORMAN:(NS00035152);PubChem class - safety and toxic;PubChem:(69391 37888249);PubMed	67371074	72.0815277099609	NaN	0_unknown_
+MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA	[M+H]+	1	1	1	1	0.8516968844490043	-30.428891568077617	N/A	19.249416937845428	C2H6OS	[M+H]+	IAZDPXIOMUYVGZ	InChI=1S/C2H6OS/c1-4(2)3/h1-2H3	MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA	CS(=O)C	-0.600000024	679;75151;12206145;12264368;10103116;18594457;20151975;20719893;21022526;22345572;57247813;71309204;76973052;90811807;90817578	HMDB:(2151);PubChem class - food;Maconda:(CON00016);ZINC bio:(ZINC05224188);HSDB:(67-68-5);MeSH:(679);Plantcyc:(DMSO);PubMed;NORMAN:(NS00001957);Natural Products:(UNPD148866);PubChem:(679 75151 12206145 12264368 10103116 18594457 20151975 20719893 21022526 22345572 57247813 71309204 76973052 90811807 90817578);CHEBI:(28262);PubChem class - bio and metabolites;KEGG:(C11143);PubChem class - safety and toxic;PubChem class - drug;Biocyc:(DMSO)	252096366	79.0218658447266	NaN	0_unknown_
+MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA	[M+H]+	1	1	1	1	0.8955620660537568	-41.80129625695348	N/A	14.61674883848477	C2H6OS	[M+H]+	IAZDPXIOMUYVGZ	InChI=1S/C2H6OS/c1-4(2)3/h1-2H3	MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA	CS(=O)C	-0.600000024	679;75151;12206145;12264368;10103116;18594457;20151975;20719893;21022526;22345572;57247813;71309204;76973052;90811807;90817578	HMDB:(2151);PubChem class - food;Maconda:(CON00016);ZINC bio:(ZINC05224188);HSDB:(67-68-5);MeSH:(679);Plantcyc:(DMSO);PubMed;NORMAN:(NS00001957);Natural Products:(UNPD148866);PubChem:(679 75151 12206145 12264368 10103116 18594457 20151975 20719893 21022526 22345572 57247813 71309204 76973052 90811807 90817578);CHEBI:(28262);PubChem class - bio and metabolites;KEGG:(C11143);PubChem class - safety and toxic;PubChem class - drug;Biocyc:(DMSO)	252096366	79.0218811035156	NaN	0_unknown_
+MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA	[M+H]+	1	1	1	1	0.8320051497664187	-29.61962034150095	N/A	27.955014068311474	C2H6OS	[M+H]+	IAZDPXIOMUYVGZ	InChI=1S/C2H6OS/c1-4(2)3/h1-2H3	MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA	CS(=O)C	-0.600000024	679;75151;12206145;12264368;10103116;18594457;20151975;20719893;21022526;22345572;57247813;71309204;76973052;90811807;90817578	HMDB:(2151);PubChem class - food;Maconda:(CON00016);ZINC bio:(ZINC05224188);HSDB:(67-68-5);MeSH:(679);Plantcyc:(DMSO);PubMed;NORMAN:(NS00001957);Natural Products:(UNPD148866);PubChem:(679 75151 12206145 12264368 10103116 18594457 20151975 20719893 21022526 22345572 57247813 71309204 76973052 90811807 90817578);CHEBI:(28262);PubChem class - bio and metabolites;KEGG:(C11143);PubChem class - safety and toxic;PubChem class - drug;Biocyc:(DMSO)	252096366	79.0218887329102	NaN	0_unknown_
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bc_annotation_CCMSLIB00000578155_result.tsv	Wed Feb 02 17:29:46 2022 +0000
@@ -0,0 +1,2 @@
+name	adduct	rank	formulaRank	#adducts	#predictedFPs	ConfidenceScore	CSI:FingerIDScore	ZodiacScore	SiriusScore	molecularFormula	adduct	InChIkey2D	InChI	name	smiles	xlogp	pubchemids	links	dbflags	ionMass	retentionTimeInSeconds	id
+D-GLUCOSE-6-PHOSPHATE	[M-H]-	1	1	1	1	0.2777991179997831	-40.17179067053119	N/A	17.8469785346642	C6H13O9P	[M-H]-	NBSCHQHZLSJFNQ	InChI=1S/C6H13O9P/c7-3-2(1-14-16(11,12)13)15-6(10)5(9)4(3)8/h2-10H,1H2,(H2,11,12,13)	D-GLUCOSE-6-PHOSPHATE	C(C1C(C(C(C(O1)O)O)O)O)OP(=O)(O)O	-4.19999981	208;5958;65127;439198;439284;439404;439427;440100;447096;449526;4178491;4459709;9817215;9859975;10848963;12314997;11536233;10038266;12598269;11651816;11651817;11701643;10332946;10422797;10422798;11499884;16219407;21604864;21604865;23421197;23421199;23421200;25200774;25244236;24802166;42609823;44589902;44629605;46936284;51351673;51351674;59660207;59660208;66804219;72200063;71048769;70828590;90657928;90087729;92043642;89530481;89533633;92144442;92331698;92331699;92450038;102072969;101251820;100983220;124302956;124303605;129703999;129800866;133556303;134813555;135054589;135070792	HMDB:(1078 1401 3498);SuperNatural:(SN00005991 SN00006877 SN00368182 SN00005992);ZINC bio:(ZINC01529564 ZINC01532857 ZINC03869395 ZINC03869396 ZINC03869397 ZINC03869398 ZINC03875374 ZINC03875375 ZINC04096188 ZINC08551508 ZINC13540027);additional;MeSH:(65127 25244236);Plantcyc:(CPD-15711 GLC-6-P CPD-15712 CPD-1241 D-HEXOSE-6-PHOSPHATE MANNOSE-6P ALPHA-GLC-6-P);PubMed;NORMAN:(NS00015226);COCONUT:(CNP0093800 CNP0209092);KNApSAcK:(7307);Natural Products:(UNPD119019 UNPD208877);PubChem:(208 5958 65127 439198 439284 439404 439427 440100 447096 449526 4178491 4459709 9817215 9859975 10848963 12314997 11536233 10038266 12598269 11651816 11651817 11701643 10332946 10422797 10422798 11499884 16219407 21604864 21604865 23421197 23421199 23421200 25200774 25244236 24802166 42609823 44589902 44629605 46936284 51351673 51351674 59660207 59660208 66804219 72200063 71048769 70828590 90657928 90087729 92043642 89530481 89533633 92144442 92331698 92331699 92450038 102072969 101251820 100983220 124302956 124303605 129703999 129800866 133556303 134813555 135054589 135070792);CHEBI:(49728 91004 61548 58735 58247 134068 4170 4141 136602 58225 47944 43896 17719 48066 17665 61567 60332 61667 41076);PubChem class - bio and metabolites;KEGG:(C00275 C01172 C02965 C03735 C00668 C01113 C00092 C02962);YMDB:(2311 311 947 966 242);Training Set;Biocyc:(CPD-15711 GLC-6-P CPD-15712 CPD-1241 D-HEXOSE-6-PHOSPHATE MANNOSE-6P CPD1A0-6282 ALPHA-GLC-6-P SMA-0000014 SMA-0000409)	20965758	259.022	NaN	0_unknown_
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bc_canopus_CCMSLIB00000578155_result.tsv	Wed Feb 02 17:29:46 2022 +0000
@@ -0,0 +1,2 @@
+name	adduct	name	molecularFormula	adduct	most specific class	level 5	subclass	class	superclass	all classifications
+D-GLUCOSE-6-PHOSPHATE	[M-H]-	D-GLUCOSE-6-PHOSPHATE	C6H13O9P	[M-H]-	Hexose phosphates	Monosaccharides	Carbohydrates and carbohydrate conjugates	Organooxygen compounds	Organic oxygen compounds	Organic compounds; Organoheterocyclic compounds; Carbohydrates and carbohydrate conjugates; Alcohols and polyols; Ethers; Organic acids and derivatives; Organooxygen compounds; Organic phosphoric acids and derivatives; Phosphate esters; Hemiacetals; Hexoses; Monosaccharides; Monosaccharide phosphates; Secondary alcohols; Oxanes; Hexose phosphates; Polyols; Alkyl phosphates; Monoalkyl phosphates; Organic oxides; Oxacyclic compounds; Hydrocarbon derivatives; Organic oxygen compounds; Chemical entities
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/canopus_CCMSLIB00000578155_result.tsv	Wed Feb 02 17:29:46 2022 +0000
@@ -0,0 +1,2 @@
+name	adduct	name	molecularFormula	adduct	most specific class	level 5	subclass	class	superclass	all classifications
+D-GLUCOSE-6-PHOSPHATE	[M-H]-	D-GLUCOSE-6-PHOSPHATE	C6H13O9P	[M-H]-	Hexose phosphates	Monosaccharides	Carbohydrates and carbohydrate conjugates	Organooxygen compounds	Organic oxygen compounds	Organic compounds; Organoheterocyclic compounds; Carbohydrates and carbohydrate conjugates; Alcohols and polyols; Ethers; Organic acids and derivatives; Organooxygen compounds; Organic phosphoric acids and derivatives; Phosphate esters; Hemiacetals; Hexoses; Monosaccharides; Monosaccharide phosphates; Secondary alcohols; Oxanes; Hexose phosphates; Polyols; Alkyl phosphates; Monoalkyl phosphates; Organic oxides; Oxacyclic compounds; Hydrocarbon derivatives; Organic oxygen compounds; Chemical entities
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/canopus_ML006801.tsv	Wed Feb 02 17:29:46 2022 +0000
@@ -0,0 +1,2 @@
+name	adduct	name	molecularFormula	adduct	most specific class	level 5	subclass	class	superclass	all classifications
+L-thialysine; LC-ESI-ITFT; MS2; CE: 50%; R=7500; [M+H]+	[M+H]+	L-thialysine; LC-ESI-ITFT; MS2; CE: 50%; R=7500; [M+H]+	C5H12N2O2S	[M+H]+	L-alpha-amino acids	Amino acids and derivatives	Amino acids, peptides, and analogues	Carboxylic acids and derivatives	Organic acids and derivatives	Organic compounds; Organosulfur compounds; Amino acids, peptides, and analogues; Alpha amino acids and derivatives; Organic acids and derivatives; Carboxylic acids and derivatives; Organonitrogen compounds; Organooxygen compounds; Amino acids and derivatives; Monoalkylamines; Monocarboxylic acids and derivatives; Thioethers; Carboxylic acids; Carbonyl compounds; Alpha amino acids; Amines; Primary amines; Sulfenyl compounds; Dialkylthioethers; Organic oxides; L-alpha-amino acids; Hydrocarbon derivatives; Amino acids; Cysteine and derivatives; Organopnictogen compounds; Organic oxygen compounds; Organic nitrogen compounds; Chemical entities
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/canopus_RP022611_result.tsv	Wed Feb 02 17:29:46 2022 +0000
@@ -0,0 +1,2 @@
+name	adduct	name	molecularFormula	adduct	most specific class	level 5	subclass	class	superclass	all classifications
+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	[M-H]-	D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	C6H12O6	[M-H]-	Hexoses	Monosaccharides	Carbohydrates and carbohydrate conjugates	Organooxygen compounds	Organic oxygen compounds	Organic compounds; Carbohydrates and carbohydrate conjugates; Aldehydes; Alcohols and polyols; Primary alcohols; Organooxygen compounds; Hexoses; Monosaccharides; Secondary alcohols; Carbonyl compounds; Polyols; Organic oxides; Hydrocarbon derivatives; Organic oxygen compounds; Chemical entities
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/canopus_RP022611_result_all_adducts.tsv	Wed Feb 02 17:29:46 2022 +0000
@@ -0,0 +1,5 @@
+name	adduct	name	molecularFormula	adduct	most specific class	level 5	subclass	class	superclass	all classifications
+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	[M-H]-	D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	C6H12O6	[M-H]-	Hexoses	Monosaccharides	Carbohydrates and carbohydrate conjugates	Organooxygen compounds	Organic oxygen compounds	Organic compounds; Carbohydrates and carbohydrate conjugates; Aldehydes; Alcohols and polyols; Primary alcohols; Organooxygen compounds; Hexoses; Monosaccharides; Secondary alcohols; Carbonyl compounds; Polyols; Organic oxides; Hydrocarbon derivatives; Organic oxygen compounds; Chemical entities
+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	[M+Cl]-	D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	C4H10N5O	[M+Cl]-	Guanidines		Guanidines	Organonitrogen compounds	Organic nitrogen compounds	Organic compounds; Organoheterocyclic compounds; Organonitrogen compounds; Guanidines; Azacyclic compounds; Hydrocarbon derivatives; Organopnictogen compounds; Organic oxygen compounds; Organic nitrogen compounds; Chemical entities
+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	[M+HCOO]-	D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	C5H10O4	[M+HCOO]-	Monosaccharides	Monosaccharides	Carbohydrates and carbohydrate conjugates	Organooxygen compounds	Organic oxygen compounds	Organic compounds; Aldehydes; Alcohols and polyols; Organooxygen compounds; Monosaccharides; Secondary alcohols; Carbonyl compounds; Polyols; Organic oxides; Hydrocarbon derivatives; Organic oxygen compounds; Chemical entities
+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	[M+CH3COO]-	D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	C4H8O4	[M+CH3COO]-	Monosaccharides	Monosaccharides	Carbohydrates and carbohydrate conjugates	Organooxygen compounds	Organic oxygen compounds	Organic compounds; Carbohydrates and carbohydrate conjugates; Aldehydes; Alcohols and polyols; Primary alcohols; Organooxygen compounds; Monosaccharides; Secondary alcohols; Carbonyl compounds; Short-chain aldehydes; Polyols; Organic oxides; Hydrocarbon derivatives; Organic oxygen compounds; Chemical entities
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/canopus_generic.tsv	Wed Feb 02 17:29:46 2022 +0000
@@ -0,0 +1,6 @@
+name	adduct	name	molecularFormula	adduct	most specific class	level 5	subclass	class	superclass	all classifications
+MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA	[M+H]+	MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA	C4H7NO	[M+H]+	Carbonyl compounds		Carbonyl compounds	Organooxygen compounds	Organic oxygen compounds	Organic compounds; Organonitrogen compounds; Organooxygen compounds; Carbonyl compounds; Organic oxides; Hydrocarbon derivatives; Organopnictogen compounds; Organic oxygen compounds; Organic nitrogen compounds; Chemical entities
+MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA	[M+H]+	MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA	C4H9N	[M+H]+	Amines		Amines	Organonitrogen compounds	Organic nitrogen compounds	Organic compounds; Organonitrogen compounds; Amines; Hydrocarbon derivatives; Organopnictogen compounds; Organic nitrogen compounds; Chemical entities
+MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA	[M+H]+	MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA	C2H6OS	[M+H]+	Sulfoxides			Sulfoxides	Organosulfur compounds	Organic compounds; Organosulfur compounds; Sulfoxides; Sulfinyl compounds; Organic oxides; Hydrocarbon derivatives; Organic oxygen compounds; Chemical entities
+MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA	[M+H]+	MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA	C2H6OS	[M+H]+	Sulfonyls			Sulfonyls	Organosulfur compounds	Organic compounds; Organosulfur compounds; Sulfonyls; Organic oxides; Hydrocarbon derivatives; Organic oxygen compounds; Chemical entities
+MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA	[M+H]+	MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA	C2H6OS	[M+H]+	Sulfoxides			Sulfoxides	Organosulfur compounds	Organic compounds; Organosulfur compounds; Sulfoxides; Sulfinyl compounds; Organic oxides; Hydrocarbon derivatives; Organic oxygen compounds; Chemical entities
--- a/test-data/generic.tsv	Thu Jul 02 11:01:45 2020 -0400
+++ b/test-data/generic.tsv	Wed Feb 02 17:29:46 2022 +0000
@@ -1,7 +1,6 @@
-name	adduct	inchikey2D	inchi	molecularFormula	rank	score	name	smiles	xlogp	pubchemids	links
-MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA	[M+H]+	HNJBEVLQSNELDL	InChI=1S/C4H7NO/c6-4-2-1-3-5-4/h1-3H2,(H,5,6)	C4H7NO	1	-149.0988	MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA	C1CC(=NC1)O		12025;3956071;10419134;12197590;12197592;18999930;20030003;20589568;58329813;90472990;91343693;101225382;101796586;123509162	HMDB:(2039);KNApSAcK:(38233);Natural Products:(UNPD211738);CHEBI:(36592);HSDB:(616-45-5);Plantcyc:(CPD-19607)
-MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA	[M+H]+	MWFMGBPGAXYFAR	InChI=1S/C4H7NO/c1-4(2,6)3-5/h6H,1-2H3	C4H7NO	2	-169.83339	MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA	CC(C)(C#N)O		6406;10486800;91131204	HMDB:(60427);Natural Products:(UNPD47968);CHEBI:(15348);HSDB:(75-86-5);KEGG:(C02659);Plantcyc:(2-HYDROXY-2-METHYLPROPANENITRILE)
-MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA	[M+H]+	RWRDLPDLKQPQOW	InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2	C4H9N	1	-136.14546	MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA	C1CCNC1		31268;3613359;11062297;12196044;12196046;12196049;12196050;18440991;20463768;53660610;57608708;57608709;57608710;57750053;60135501;90927493;91312985	HMDB:(31641);Natural Products:(UNPD154562);CHEBI:(33135 52145);HSDB:(123-75-1);Plantcyc:(PYRROLIDINE);Biocyc:(PYRROLIDINE)
-MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA	[M+H]+	IAZDPXIOMUYVGZ	InChI=1S/C2H6OS/c1-4(2)3/h1-2H3	C2H6OS	1	-75.82312	MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA	CS(=O)C		679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578	HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)
-MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA	[M+H]+	IAZDPXIOMUYVGZ	InChI=1S/C2H6OS/c1-4(2)3/h1-2H3	C2H6OS	1	-86.79175	MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA	CS(=O)C		679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578	HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)
-MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA	[M+H]+	IAZDPXIOMUYVGZ	InChI=1S/C2H6OS/c1-4(2)3/h1-2H3	C2H6OS	1	-75.67854	MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA	CS(=O)C		679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578	HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)
+name	adduct	name	molecularFormula	adduct	most specific class	level 5	subclass	class	superclass	all classifications
+MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA	[M+H]+	MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA	C4H7NO	[M+H]+	Carbonyl compounds		Carbonyl compounds	Organooxygen compounds	Organic oxygen compounds	Organic compounds; Organonitrogen compounds; Organooxygen compounds; Carbonyl compounds; Organic oxides; Hydrocarbon derivatives; Organopnictogen compounds; Organic oxygen compounds; Organic nitrogen compounds; Chemical entities
+MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA	[M+H]+	MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA	C4H9N	[M+H]+	Amines		Amines	Organonitrogen compounds	Organic nitrogen compounds	Organic compounds; Organonitrogen compounds; Amines; Hydrocarbon derivatives; Organopnictogen compounds; Organic nitrogen compounds; Chemical entities
+MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA	[M+H]+	MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA	C2H6OS	[M+H]+	Sulfoxides			Sulfoxides	Organosulfur compounds	Organic compounds; Organosulfur compounds; Sulfoxides; Sulfinyl compounds; Organic oxides; Hydrocarbon derivatives; Organic oxygen compounds; Chemical entities
+MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA	[M+H]+	MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA	C2H6OS	[M+H]+	Sulfonyls			Sulfonyls	Organosulfur compounds	Organic compounds; Organosulfur compounds; Sulfonyls; Organic oxides; Hydrocarbon derivatives; Organic oxygen compounds; Chemical entities
+MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA	[M+H]+	MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA	C2H6OS	[M+H]+	Sulfoxides			Sulfoxides	Organosulfur compounds	Organic compounds; Organosulfur compounds; Sulfoxides; Sulfinyl compounds; Organic oxides; Hydrocarbon derivatives; Organic oxygen compounds; Chemical entities