changeset 0:9e6bf7278257 draft

"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
author computational-metabolomics
date Wed, 05 Feb 2020 10:41:48 -0500
parents
children 1db83da40c54
files sirius_csifingerid.py sirius_csifingerid.xml test-data/CCMSLIB00000578155.msp test-data/CCMSLIB00000578155_result.tsv test-data/ML006801.tsv test-data/ML006801.txt test-data/RP022611.txt test-data/RP022611_result.tsv test-data/demo_db.csv test-data/generic.msp test-data/generic.tsv test-data/historic.tsv test-data/historic_input.msp test-data/invalid_adduct.msp test-data/invalid_adduct_result.tsv test-data/sirus_csifingerid_test1.tsv
diffstat 15 files changed, 927 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sirius_csifingerid.py	Wed Feb 05 10:41:48 2020 -0500
@@ -0,0 +1,328 @@
+from __future__ import absolute_import, print_function
+
+import argparse
+import csv
+import glob
+import multiprocessing
+import os
+import re
+import sys
+import tempfile
+import uuid
+from collections import defaultdict
+
+import six
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--input_pth')
+parser.add_argument('--result_pth')
+parser.add_argument('--database')
+parser.add_argument('--profile')
+parser.add_argument('--candidates')
+parser.add_argument('--ppm_max')
+parser.add_argument('--polarity')
+parser.add_argument('--results_name')
+parser.add_argument('--out_dir')
+parser.add_argument('--tool_directory')
+parser.add_argument('--temp_dir')
+
+parser.add_argument('--meta_select_col', default='all')
+parser.add_argument('--cores_top_level', default=1)
+parser.add_argument('--chunks', default=1)
+parser.add_argument('--minMSMSpeaks', default=1)
+parser.add_argument('--schema', default='msp')
+args = parser.parse_args()
+print(args)
+if os.stat(args.input_pth).st_size == 0:
+    print('Input file empty')
+    exit()
+
+if args.temp_dir:
+    wd = os.path.join(args.temp_dir, 'temp')
+    os.mkdir(wd)
+
+    if not os.path.exists(wd):
+        os.mkdir(wd)
+else:
+    td = tempfile.mkdtemp()
+    wd = os.path.join(td, str(uuid.uuid4()))
+    os.mkdir(wd)
+
+######################################################################
+# Setup regular expressions for MSP parsing dictionary
+######################################################################
+regex_msp = {}
+regex_msp['name'] = [r'^Name(?:=|:)(.*)$']
+regex_msp['polarity'] = [r'^ion.*mode(?:=|:)(.*)$',
+                         r'^ionization.*mode(?:=|:)(.*)$',
+                         r'^polarity(?:=|:)(.*)$']
+regex_msp['precursor_mz'] = [r'^precursor.*m/z(?:=|:)\s*(\d*[.,]?\d*)$',
+                             r'^precursor.*mz(?:=|:)\s*(\d*[.,]?\d*)$']
+regex_msp['precursor_type'] = [r'^precursor.*type(?:=|:)(.*)$',
+                               r'^adduct(?:=|:)(.*)$',
+                               r'^ADDUCTIONNAME(?:=|:)(.*)$']
+regex_msp['num_peaks'] = [r'^Num.*Peaks(?:=|:)\s*(\d*)$']
+regex_msp['msp'] = [r'^Name(?:=|:)(.*)$']  # Flag for standard MSP format
+
+regex_massbank = {}
+regex_massbank['name'] = [r'^RECORD_TITLE:(.*)$']
+regex_massbank['polarity'] = \
+    [r'^AC\$MASS_SPECTROMETRY:\s+ION_MODE\s+(.*)$']
+regex_massbank['precursor_mz'] = \
+    [r'^MS\$FOCUSED_ION:\s+PRECURSOR_M/Z\s+(\d*[.,]?\d*)$']
+regex_massbank['precursor_type'] = \
+    [r'^MS\$FOCUSED_ION:\s+PRECURSOR_TYPE\s+(.*)$']
+regex_massbank['num_peaks'] = [r'^PK\$NUM_PEAK:\s+(\d*)']
+regex_massbank['cols'] = [r'^PK\$PEAK:\s+(.*)']
+regex_massbank['massbank'] = [r'^RECORD_TITLE:(.*)$']  # Flag for massbank
+
+if args.schema == 'msp':
+    meta_regex = regex_msp
+elif args.schema == 'massbank':
+    meta_regex = regex_massbank
+elif args.schema == 'auto':
+    # If auto we just check for all the available paramter names
+    # and then determine if Massbank or MSP based on
+    # the name parameter
+    meta_regex = {}
+    meta_regex.update(regex_massbank)
+    meta_regex['name'].extend(regex_msp['name'])
+    meta_regex['polarity'].extend(regex_msp['polarity'])
+    meta_regex['precursor_mz'].extend(regex_msp['precursor_mz'])
+    meta_regex['precursor_type'].extend(regex_msp['precursor_type'])
+    meta_regex['num_peaks'].extend(regex_msp['num_peaks'])
+    meta_regex['msp'] = regex_msp['msp']
+
+    print(meta_regex)
+
+# this dictionary will store the meta data results form the MSp file
+meta_info = {}
+
+
+# function to extract the meta data using the regular expressions
+def parse_meta(meta_regex, meta_info=None):
+    if meta_info is None:
+        meta_info = {}
+    for k, regexes in six.iteritems(meta_regex):
+        for reg in regexes:
+            m = re.search(reg, line, re.IGNORECASE)
+            if m:
+                meta_info[k] = '-'.join(m.groups()).strip()
+    return meta_info
+
+
+######################################################################
+# Setup parameter dictionary
+######################################################################
+def init_paramd(args):
+    paramd = defaultdict()
+    paramd["cli"] = {}
+    paramd["cli"]["--database"] = args.database
+    paramd["cli"]["--profile"] = args.profile
+    paramd["cli"]["--candidates"] = args.candidates
+    paramd["cli"]["--ppm-max"] = args.ppm_max
+    if args.polarity == 'positive':
+        paramd["default_ion"] = "[M+H]+"
+    elif args.polarity == 'negative':
+        paramd["default_ion"] = "[M-H]-"
+    else:
+        paramd["default_ion"] = ''
+
+    return paramd
+
+
+######################################################################
+# Function to run sirius when all meta and spectra is obtained
+######################################################################
+def run_sirius(meta_info, peaklist, args, wd, spectrac):
+    # Get sample details (if possible to extract) e.g. if created as part of
+    # the msPurity pipeline) choose between getting additional details to
+    # add as columns as either all meta data from msp, just details from the
+    # record name (i.e. when using msPurity and we have the columns
+    # coded into the name) or just the spectra index (spectrac)
+    paramd = init_paramd(args)
+
+    if args.meta_select_col == 'name':
+        # have additional column of just the name
+        paramd['additional_details'] = {'name': meta_info['name']}
+    elif args.meta_select_col == 'name_split':
+        # have additional columns split by "|" and
+        # then on ":" e.g. MZ:100.2 | RT:20 | xcms_grp_id:1
+        paramd['additional_details'] = {
+            sm.split(":")[0].strip(): sm.split(":")[1].strip() for sm in
+            meta_info['name'].split("|")}
+    elif args.meta_select_col == 'all':
+        # have additional columns based on all
+        # the meta information extracted from the MSP
+        paramd['additional_details'] = meta_info
+    else:
+        # Just have and index of the spectra in the MSP file
+        paramd['additional_details'] = {'spectra_idx': spectrac}
+
+    paramd["SampleName"] = "{}_sirius_result".format(spectrac)
+
+    paramd["cli"]["--output"] = \
+        os.path.join(wd, "{}_sirius_result".format(spectrac))
+
+    # =============== Output peaks to txt file  ==============================
+    paramd["cli"]["--ms2"] = os.path.join(wd,
+                                          "{}_tmpspec.txt".format(spectrac))
+
+    # write spec file
+    with open(paramd["cli"]["--ms2"], 'w') as outfile:
+        for p in peaklist:
+            outfile.write(p[0] + "\t" + p[1] + "\n")
+
+    # =============== Update param based on MSP metadata ======================
+    # Replace param details with details from MSP if required
+    if 'precursor_type' in meta_info and meta_info['precursor_type']:
+        paramd["cli"]["--ion"] = meta_info['precursor_type']
+    else:
+        if paramd["default_ion"]:
+            paramd["cli"]["--ion"] = paramd["default_ion"]
+        else:
+            paramd["cli"]["--auto-charge"] = ''
+
+    if 'precursor_mz' in meta_info and meta_info['precursor_mz']:
+        paramd["cli"]["--precursor"] = meta_info['precursor_mz']
+
+    # ============== Create CLI cmd for metfrag ===============================
+    cmd = "sirius --fingerid"
+    for k, v in six.iteritems(paramd["cli"]):
+        cmd += " {} {}".format(str(k), str(v))
+    paramds[paramd["SampleName"]] = paramd
+
+    # =============== Run srius ==============================================
+    # Filter before process with a minimum number of MS/MS peaks
+    if plinesread >= float(args.minMSMSpeaks):
+
+        if int(args.cores_top_level) == 1:
+            os.system(cmd)
+
+    return paramd, cmd
+
+
+def work(cmds):
+    return [os.system(cmd) for cmd in cmds]
+
+
+######################################################################
+# Parse MSP file and run SIRIUS CLI
+######################################################################
+# keep list of commands if performing in CLI in parallel
+cmds = []
+# keep a dictionary of all params
+paramds = {}
+# keep count of spectra (for uid)
+spectrac = 0
+
+with open(args.input_pth, "r") as infile:
+    # number of lines for the peaks
+    pnumlines = 0
+    # number of lines read for the peaks
+    plinesread = 0
+    for line in infile:
+
+        line = line.strip()
+
+        if pnumlines == 0:
+
+            # ============== Extract metadata from MSP ========================
+            meta_info = parse_meta(meta_regex, meta_info)
+
+            if ('massbank' in meta_info and 'cols' in meta_info) or \
+                    ('msp' in meta_info and 'num_peaks' in meta_info):
+                pnumlines = int(meta_info['num_peaks'])
+                peaklist = []
+                plinesread = 0
+
+        elif plinesread < pnumlines:
+            # =============== Extract peaks from MSP ==========================
+            # .split() will split on any empty space (i.e. tab and space)
+            line = tuple(line.split())
+            # Keep only m/z and intensity, not relative intensity
+            save_line = tuple(line[0].split() + line[1].split())
+            plinesread += 1
+
+            peaklist.append(save_line)
+
+        elif plinesread and plinesread == pnumlines:
+            # ======= Get sample name and additional details for output =======
+            spectrac += 1
+            paramd, cmd = run_sirius(meta_info, peaklist, args, wd, spectrac)
+
+            paramds[paramd["SampleName"]] = paramd
+            cmds.append(cmd)
+
+            meta_info = {}
+            pnumlines = 0
+            plinesread = 0
+
+            # end of file. Check if there is a MSP spectra to
+            # run metfrag on still
+
+    if plinesread and plinesread == pnumlines:
+        paramd, cmd = run_sirius(meta_info, peaklist, args, wd, spectrac + 1)
+
+        paramds[paramd["SampleName"]] = paramd
+        cmds.append(cmd)
+
+# Perform multiprocessing on command line call level
+if int(args.cores_top_level) > 1:
+    cmds_chunks = [cmds[x:x + int(args.chunks)]
+                   for x in list(range(0, len(cmds), int(args.chunks)))]
+    pool = multiprocessing.Pool(processes=int(args.cores_top_level))
+    pool.map(work, cmds_chunks)
+    pool.close()
+    pool.join()
+
+######################################################################
+# Concatenate and filter the output
+######################################################################
+# outputs might have different headers. Need to get a list of all the headers
+# before we start merging the files outfiles = [os.path.join(wd, f) for f in
+# glob.glob(os.path.join(wd, "*_metfrag_result.csv"))]
+outfiles = glob.glob(os.path.join(wd, '*', '*', 'summary_csi_fingerid.csv'))
+
+# sort files nicely
+outfiles.sort(key=lambda s: int(re.match(r'^.*/('
+                                         r'\d+).*/.*/summary_csi_fingerid.csv',
+                                         s).group(1)))
+print(outfiles)
+
+if len(outfiles) == 0:
+    print('No results')
+    sys.exit()
+
+headers = []
+c = 0
+for fn in outfiles:
+    with open(fn, 'r') as infile:
+        reader = csv.reader(infile, delimiter='\t')
+        if sys.version_info >= (3, 0):
+            headers.extend(next(reader))
+        else:
+            headers.extend(reader.next())
+        break
+
+headers = list(paramd['additional_details'].keys()) + headers
+
+with open(args.result_pth, 'a') as merged_outfile:
+    dwriter = csv.DictWriter(merged_outfile,
+                             fieldnames=headers, delimiter='\t')
+    dwriter.writeheader()
+
+    for fn in sorted(outfiles):
+        print(fn)
+
+        with open(fn) as infile:
+            reader = csv.DictReader(infile, delimiter='\t')
+
+            ad = paramds[fn.split(os.sep)[-3]]['additional_details']
+
+            for line in reader:
+                line.update(ad)
+                # round score to 5 d.p.
+                line['score'] = round(float(line['score']), 5)
+
+                dwriter.writerow(line)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sirius_csifingerid.xml	Wed Feb 05 10:41:48 2020 -0500
@@ -0,0 +1,194 @@
+<tool id="sirius_csifingerid" name="SIRIUS-CSI:FingerID"
+      version="4.0.1+galaxy2">
+    <description>is used to identify metabolites using single and
+        tandem mass spectrometry</description>
+    <requirements>
+        <requirement type="package" version="4.0.1">
+            sirius-csifingerid</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+    <![CDATA[
+
+        python '$__tool_directory__/sirius_csifingerid.py'
+            --input_pth '$input'
+            --database $database
+            --profile $profile
+            --candidates $candidates
+            --ppm_max $ppm_max
+            --polarity $polarity
+            --out_dir .
+            --result_pth sirius_all_summary.tsv
+            --cores_top_level 1
+            --meta_select_col $meta_select_col
+            --minMSMSpeaks $minMSMSpeaks
+            --schema $schema
+            --temp_dir .
+
+    ]]></command>
+    <inputs>
+        <param name="input" argument="--input_pth" type="data" format="msp"
+               label="MSP file (output from Create MSP tool)" />
+        <param argument="--database" type="select"
+               label="Select SIRIUS-CSI:FingerID Database" >
+            <option value="PubChem" >PubChem</option>
+            <option selected="true" value="hmdb">HMDB</option>
+            <option value="kegg">KEGG</option>
+            <option value="knapsack">KNApSAcK</option>
+            <option value="biocyc">BioCyc</option>
+            <option  value="all">All (see help)</option>
+        </param>
+        <param argument="--ppm_max" type="integer" value="10" min="0"
+               label="Mass deviation of the fragment peaks in ppm" />
+        <param argument="--candidates" type="integer" value="5" min="1"
+               label="Maximum number of candidates in the output" />
+        <param argument="--polarity" type="select" label="Ion mode" >
+            <option value="positive" selected="true">Positive</option>
+            <option value="negative">Negative</option>
+        </param> 
+        <param argument="--profile" type="select" label="Analysis used" >
+            <option value="orbitrap" selected="true">Orbitrap</option>
+            <option value="qtof">qTOF</option>
+            <option value="fticr">FT-ICR</option>
+        </param>
+        <param argument="--schema" type="select" label="Schema"
+               help="Schema used for the MSP file (auto will try to
+                     determine the schema automatically)">
+            <option value="auto" selected="True">Auto</option>
+            <option value="msp" >Generic MSP</option>
+            <option value="massbank">MassBank</option>
+        </param>
+        <param argument="--meta_select_col" type="select"
+               label="Choose how additional metadata columns are extracted"
+               help="The SIRIUS-CSI:Fingerid output can have additional
+               metadata columns added; these can be either extracted from all
+               MSP parameters or from the 'Name' and 'RECORD_TITLE' MSP
+               parameters. Additionally, columns can be added from the 'Name'
+               or 'RECORD_TITLE' parameters by splitting on | and :  e.g.
+               'MZ:100.2 | RT:20 | xcms_grp_id:1' would create MZ,RT and
+               xcms_grp_id columns">
+            <option value="name" selected="true">
+                Extra metadata columns from the Name or RECORD_TITLE</option>
+            <option value="name_split" >
+                Extra metadata columns from the Name or
+                RECORD_TITLE (each column is split on "|" and ":" ) </option>
+            <option value="all">
+                Extra metadata columns from all MSP parameters</option>
+        </param>
+        <param argument="--minMSMSpeaks" type="integer" min="0" value="0"
+               label="Minimum number of MS/MS peaks"/>
+    </inputs>
+    <outputs>
+        <data name="results" format="tsv"
+              from_work_dir="sirius_all_summary.tsv"/>
+    </outputs>
+    <tests>
+        <test>
+            <!-- Test "massbank" style data format  -->
+            <param name="input" value="ML006801.txt"  ftype="msp"/>
+            <output name="results" file="ML006801.tsv"/>
+        </test>
+        <test>
+            <!-- Test "generic format" style data format  -->
+            <param name="input" value="generic.msp" ftype="msp"/>
+            <output name="results" file="generic.tsv"/>
+        </test>
+        <test>
+            <!-- Test for glucose (qtof) MassBank data format  -->
+            <param name="input" value="RP022611.txt" ftype="msp"/>
+            <param name="profile" value="qtof"/>
+            <output name="results" file="RP022611_result.tsv"/>
+        </test>
+        <test>
+            <!-- Test for glucose (q-exactive) GNPS, MoNA data format  -->
+            <param name="input" value="CCMSLIB00000578155.msp" ftype="msp"/>
+            <param name="profile" value="orbitrap"/>
+            <output name="results" file="CCMSLIB00000578155_result.tsv"/>
+        </test>
+        <test>
+            <!-- Test invalid adduct  -->
+            <param name="input" value="invalid_adduct.msp" ftype="msp"/>
+            <output name="results" file="invalid_adduct_result.tsv"/>
+        </test>
+    </tests>
+    <help>
+----------------
+SIRIUS-FingerID
+----------------
+
+Description
+-----------
+
+| SIRIUS is a java-based software framework for discovering a landscape of
+| de-novo identification of metabolites using single and tandem mass
+| spectrometry. SIRIUS uses isotope pattern analysis for detecting the
+| molecular formula and further analyses the fragmentation pattern of a
+| compound using fragmentation trees. Website:
+| https://bio.informatik.uni-jena.de/software/sirius/
+|
+
+Parameters
+----------
+
+**\1. MSP file**
+
+MSP file created using *Create MSP* tool
+
+**\2. Select SIRIUS-CSI:FingerID Databases**
+
+The following databases are available:
+
+* PubChem
+
+* HMDB
+
+* KEGG
+
+* KNApSAcK
+
+* BioCyc
+
+* All (SIRIUS will consider all m/z possible molecular formulas) 
+
+**\3. Mass deviation of the fragment peaks in ppm**
+
+Allowed mass deviation of the fragment peaks.
+
+**\4. The maximum number of candidates in the output**
+
+Set the top X candidates to return.
+
+**\5. Ion mode**
+
+* Positive
+
+* Negative
+
+**\6. Analysis used**
+
+* Orbitrap
+
+* qTOF
+
+* FT-ICR
+
+If you want to analyze spectra measured with Orbitrap or FT-ICR, you should
+specify the appropriate analysis profile. A profile is a set of configuration
+options and scoring functions SIRIUS 3 will use for its analysis. For example,
+the Orbitrap and FT-ICR profiles have tighter constraints for the allowed mass
+deviation but do not rely so much on the intensity of isotope peaks.
+
+
+Developers and contributors
+---------------------------
+
+- **Jordi Capellades (j.capellades.to@gmail.com) - Universitat Rovira i Virgili (SP)**
+- **Thomas N Lawson (t.n.lawson@bham.ac.uk) - University of Birmingham (UK)**
+- **Simon Bray (sbray@informatik.uni-freiburg.de) - University of Freiburg (Germany)**
+- **Ralf Weber (r.j.weber@bham.ac.uk) - University of Birmingham (UK)**
+
+    </help>
+    <citations>
+        <citation type="doi">10.1073/pnas.1509788112</citation>
+        <citation type="doi">10.1093/bioinformatics/btu275</citation>
+    </citations>
+</tool> 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CCMSLIB00000578155.msp	Wed Feb 05 10:41:48 2020 -0500
@@ -0,0 +1,55 @@
+Name: D-GLUCOSE-6-PHOSPHATE
+Synon: $:00in-source
+DB#: CCMSLIB00000578155
+InChIKey: NBSCHQHZLSJFNQ-UHFFFAOYSA-N
+Precursor_type: [M-H]-
+Spectrum_type: MS2
+PrecursorMZ: 259.022
+Instrument: Q-Exactive Plus
+Ion_mode: N
+Formula: C6H13O9P
+MW: 260
+ExactMass: 260.029718626
+Comments: "cas number=54010-71-8" "pubmed id=5958" "SMILES=C(C1C(C(C(C(O1)O)O)O)O)OP(=O)(O)O" "computed SMILES=O=P(O)(O)OCC1OC(O)C(O)C(O)C1O" "computed InChI=InChI=1S/C6H13O9P/c7-3-2(1-14-16(11,12)13)15-6(10)5(9)4(3)8/h2-10H,1H2,(H2,11,12,13)" "ion source=LC-ESI" "compound source=Commercial standard" "instrument=Q-Exactive Plus" "exact mass=260.03" "charge state=0" "source file=IROA_PLATE_neg_1_H.mzXML" "ms level=MS2" "origin=GNPS-EMBL-MCF" "author=pphapale, Alexandrov Theodore, Prasad" "ionization mode=negative" "precursor m/z=259.022" "precursor type=[M-H]-" "computed mass accuracy=1.7088355429058593" "computed mass error=-4.426259999945614E-4" "SPLASH=splash10-0002-9000000000-952cb45e58693e9f65b4" "submitter=GNPS Collaboration (University of California, San Diego)"
+Num Peaks: 40
+53.041370 0.041054
+59.012539 0.064363
+71.012398 0.228006
+73.028061 0.075870
+73.063553 0.074974
+76.275803 0.045210
+78.957573 25.209277
+80.963760 0.251310
+82.959442 0.110012
+83.059830 0.080083
+96.968170 100.000000
+97.972389 0.086076
+101.023033 0.843600
+102.224525 0.053152
+111.524658 0.047110
+122.061569 0.051876
+131.034348 0.069417
+138.978973 6.805775
+143.045074 0.077522
+150.924576 0.048922
+150.978821 0.448366
+168.989807 1.792421
+175.017166 0.273820
+177.014206 0.075262
+177.033279 0.093911
+179.028229 0.060016
+189.016006 0.069395
+199.000717 2.367082
+204.990021 0.056081
+214.751755 0.053999
+214.913773 0.079768
+223.000900 0.202970
+250.817719 0.051827
+258.921234 0.240552
+258.956421 0.115132
+258.981964 0.095161
+259.022217 1.307559
+264.273438 0.053000
+276.932190 0.888159
+277.930237 0.064464
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CCMSLIB00000578155_result.tsv	Wed Feb 05 10:41:48 2020 -0500
@@ -0,0 +1,9 @@
+name	inchikey2D	inchi	molecularFormula	rank	score	name	smiles	xlogp	pubchemids	links
+D-GLUCOSE-6-PHOSPHATE	NBSCHQHZLSJFNQ	InChI=1S/C6H13O9P/c7-3-2(1-14-16(11,12)13)15-6(10)5(9)4(3)8/h2-10H,1H2,(H2,11,12,13)	C6H13O9P	1	-2956.17597	D-GLUCOSE-6-PHOSPHATE	C(C1C(C(C(C(O)O1)O)O)O)OP(=O)(O)O		208;5958;65127;439198;439284;439404;439427;440100;447096;449526;4178491;4459709;9817215;9859975;10038266;10332946;10422797;10422798;10848963;11499884;11536233;11651816;11651817;11701643;12314997;12598269;16219407;21604864;21604865;23421197;23421199;23421200;24802166;25200774;25244236;42609823;44589902;44629605;46936284;51351673;51351674;59660207;59660208;66804219;70828590;71048769;72200063;89530481;89533633;90087729;90657928;92043642;92144442;92331699;92450038;100983220;101251820;102072969;124302956;124303605	HMDB:(3498);KNApSAcK:(7307);Natural Products:(UNPD119019 UNPD208877);CHEBI:(47944 136602 41076 4141 17665 134068 91004 61567 61667 4170 61548 58735 17719 58225 60332 58247 48066);KEGG:(C02962 C03735 C00275 C02965 C01172 C00092 C00668 C01113);Plantcyc:(MANNOSE-6P CPD-15711 CPD-15712 D-HEXOSE-6-PHOSPHATE GLC-6-P ALPHA-GLC-6-P CPD-1241);YMDB:(2311);Biocyc:(CPD-15712 CPD-1241)
+D-GLUCOSE-6-PHOSPHATE	HXXFSFRBOHSIMQ	InChI=1S/C6H13O9P/c7-1-2-3(8)4(9)5(10)6(14-2)15-16(11,12)13/h2-10H,1H2,(H2,11,12,13)	C6H13O9P	2	-2968.893	D-GLUCOSE-6-PHOSPHATE	C(C1C(C(C(C(O1)OP(=O)(O)O)O)O)O)O		466;65533;122250;123912;439165;439279;439426;439995;644175;1549075;1549076;3034296;3246168;3551220;5702593;6560208;6560209;7091266;7098639;10084035;11108064;11299931;11536234;11557960;11586967;11637475;11701642;12773693;12773694;15720053;20706002;21120286;22298591;23421196;23421198;23724605;23724607;24802153;24802168;25134172;25244208;25245607;26470622;26470623;26470920;26470921;26470922;40467866;40467867;40467868;40473131;40473132;42609824;44224049;45109780;46173227;46173228;46878478;51397481;57349329;57466719;57616986;57616987;58434201;59383287;59973641;59973642;59985133;60023647;67062884;67062905;67062913;67062918;67794900;68298161;68937634;70124502;70837719;71122101;71728461;88462985;90472756;91265893;91658980;101503810;101747832;101747833;121494054;122545953;125293590;125293595;125293596;125293598	HMDB:(62705);KNApSAcK:(7389);Natural Products:(UNPD85752 UNPD57928 UNPD186485);CHEBI:(16077 17973 75522 24588 53072 58601 57684 60389 58336 60465 53025 16326 80181 58908 18205 16218 58409 57629);KEGG:(C15924 C15926 C01171 C03384 C00636 C00446 C01002 C00103 C00663);Plantcyc:(CPD-9828 GALACTOSE-1P GLC-1-P MANNOSE-1P CPDQT-4 CPD-448 CPD4FS-5);YMDB:(970);Biocyc:(CPD4FS-5)
+D-GLUCOSE-6-PHOSPHATE	BGWGXPAPYGQALX	InChI=1S/C6H13O9P/c7-2-6(10)5(9)4(8)3(15-6)1-14-16(11,12)13/h3-5,7-10H,1-2H2,(H2,11,12,13)	C6H13O9P	3	-2996.82333	D-GLUCOSE-6-PHOSPHATE	C(C1C(C(C(CO)(O)O1)O)O)OP(=O)(O)O		719;124155;439160;439396;440641;440970;444848;5083448;9543488;15648788;16760431;20843252;21604862;21604863;23421195;24802142;25201714;25245410;42609822;46174048;46878483;52916945;86308139;91746169;92024282;102322321;122174030;124300900;124350439;124524514;124579643	HMDB:(6873);KNApSAcK:(7305);Natural Products:(UNPD153056);CHEBI:(57634 4251 81499 61553 58695 16084 6307 45804 47946 58926 61527);KEGG:(C06312 C18096 C01097 C05345 C00085);Plantcyc:(TAGATOSE-6-PHOSPHATE FRUCTOSE-6P);Biocyc:(L-TAGATOSE-6-PHOSPHATE)
+D-GLUCOSE-6-PHOSPHATE	PMTUDJVZIGZBIX	InChI=1S/C6H13O9P/c7-1-3-4(9)5(10)6(2-8,14-3)15-16(11,12)13/h3-5,7-10H,1-2H2,(H2,11,12,13)	C6H13O9P	4	-2999.57091	D-GLUCOSE-6-PHOSPHATE	C(C1C(C(C(CO)(O1)OP(=O)(O)O)O)O)O		193537;5176477;6398638;15703397;16069990;21126112;21126113;57357663;99639213;124202606	HMDB:(6800);CHEBI:(27884 57267 12350);KEGG:(C03267);YMDB:(878);Biocyc:(FRUCTOSE-2-PHOSPHATE)
+D-GLUCOSE-6-PHOSPHATE	RHKKZBWRNHGJEZ	InChI=1S/C6H13O9P/c7-1-3-4(8)5(9)6(10,15-3)2-14-16(11,12)13/h3-5,7-10H,1-2H2,(H2,11,12,13)	C6H13O9P	5	-3000.17545	D-GLUCOSE-6-PHOSPHATE	C(C1C(C(C(COP(=O)(O)O)(O)O1)O)O)O		717;439394;10400369;21627880;23421194;25244216;51397484;52916944;90658050;90658051;90659357;90659358;92209483;97041850	HMDB:(1076);KNApSAcK:(19676);CHEBI:(37515 58674);KEGG:(C01094);Plantcyc:(FRU1P);Biocyc:(CPD-16154 CPD-16158 CPD-16159)
+D-GLUCOSE-6-PHOSPHATE	INAPMGSXUVUWAF	InChI=1S/C6H13O9P/c7-1-2(8)4(10)6(5(11)3(1)9)15-16(12,13)14/h1-11H,(H2,12,13,14)	C6H13O9P	6	-3061.86763	D-GLUCOSE-6-PHOSPHATE	C1(C(C(C(C(C1O)O)OP(=O)(O)O)O)O)O		9;107737;160886;161368;440043;440194;4449629;5288642;5288700;6323385;7098643;10659045;13072112;18654477;25200523;25200860;25203035;35027167;53924828;59824613;59824614;59824615;59824616;101661021;121400595;121403401	HMDB:(6814);KNApSAcK:(7483);Natural Products:(UNPD107543 UNPD92136 UNPD189294);CHEBI:(58469 18169 62383 37493 18384 58433 18297 64841 58401 64838 84142 84141);KEGG:(C03546 C06155 C01177 C04006);Plantcyc:(1-L-MYO-INOSITOL-1-P D-MYO-INOSITOL-1-MONOPHOSPHATE CPD-6701 CPD-6702 CPD-6746 CPD-9887 D-MYO-INOSITOL-4-PHOSPHATE);YMDB:(2322);Biocyc:(D-MYO-INOSITOL-4-PHOSPHATE CPD-6701 CPD-6702 CPD-6746)
+D-GLUCOSE-6-PHOSPHATE	GSXOAOHZAIYLCY	InChI=1S/C6H13O9P/c7-1-3(8)5(10)6(11)4(9)2-15-16(12,13)14/h4-7,9-11H,1-2H2,(H2,12,13,14)	C6H13O9P	7	-3108.21629	D-GLUCOSE-6-PHOSPHATE	C(C(=O)C(C(C(COP(=O)(O)O)O)O)O)O		603;69507;151197;5459902;5459952;6602428;20111689;20111690;21114947;21872891;23615358;40467872;40467873;46943428;50909805;87615581	HMDB:(124);KNApSAcK:(19683);Natural Products:(UNPD94448);CHEBI:(57579 61519 134284 15946 15845 61559 47947 134283);Plantcyc:(D-ALLULOSE-6-PHOSPHATE);YMDB:(78);Biocyc:(CPD-15828 CPD-15826 D-ALLULOSE-6-PHOSPHATE)
+D-GLUCOSE-6-PHOSPHATE	ZKLLSNQJRLJIGT	InChI=1S/C6H13O9P/c7-1-3(8)5(10)6(11)4(9)2-15-16(12,13)14/h3,5-8,10-11H,1-2H2,(H2,12,13,14)	C6H13O9P	8	-3116.86489	D-GLUCOSE-6-PHOSPHATE	C(C(C(C(C(=O)COP(=O)(O)O)O)O)O)O		481;65246;151033;439837;440076;6101730;11129032;11737049;14844438;20111955;21145035;23615304;54551858;54551860;54551861;54551863;91010818	HMDB:(60467);KNApSAcK:(19630);CHEBI:(38342 218 18105);KEGG:(C03654 C02888);YMDB:(655);Biocyc:(CPD-15970 CPD0-1116 CPD-531)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ML006801.tsv	Wed Feb 05 10:41:48 2020 -0500
@@ -0,0 +1,2 @@
+name	inchikey2D	inchi	molecularFormula	rank	score	name	smiles	xlogp	pubchemids	links
+L-thialysine; LC-ESI-ITFT; MS2; CE: 50%; R=7500; [M+H]+	GHSJKUNUIHUPDF	InChI=1S/C5H12N2O2S/c6-1-2-10-3-4(7)5(8)9/h4H,1-3,6-7H2,(H,8,9)	C5H12N2O2S	1	-7.08314	L-thialysine; LC-ESI-ITFT; MS2; CE: 50%; R=7500; [M+H]+	C(CSCC(C(=O)O)N)N		20049;99558;6995002;12898158;25246097;54754416;57517225	HMDB:(33518);Natural Products:(UNPD166389);CHEBI:(497734);Plantcyc:(S-2-AMINOETHYL-L-CYSTEINE);Biocyc:(S-2-AMINOETHYL-L-CYSTEINE THIALYSINE)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ML006801.txt	Wed Feb 05 10:41:48 2020 -0500
@@ -0,0 +1,63 @@
+ACCESSION: ML004801
+RECORD_TITLE: L-thialysine; LC-ESI-ITFT; MS2; CE: 50%; R=7500; [M+H]+
+DATE: 2014.11.12
+AUTHORS: Mark Earll, Stephan Beisken, EMBL-EBI
+LICENSE: CC BY-SA
+COPYRIGHT: Copyright (C) 2014, European Molecular Biology Laboratory - European Bioinformatics Institute (EMBL-EBI), Hinxton, UK.
+PUBLICATION: Beisken S et al (2014) Scientific Data, 1:140029, DOI:10.1038/sdata.2014.29. http://www.ebi.ac.uk/metabolights/MTBLS38
+COMMENT: CONFIDENCE standard compound
+COMMENT: ML_ID 48
+CH$NAME: L-thialysine
+CH$NAME: (2R)-2-amino-3-(2-aminoethylsulfanyl)propanoic acid
+CH$COMPOUND_CLASS: N/A; Environmental Standard
+CH$FORMULA: C5H12N2O2S
+CH$EXACT_MASS: 164.0619
+CH$SMILES: NCCSC[C@H](N)C(=O)O
+CH$IUPAC: InChI=1S/C5H12N2O2S/c6-1-2-10-3-4(7)5(8)9/h4H,1-3,6-7H2,(H,8,9)/t4-/m0/s1
+CH$LINK: CHEBI 497734
+CH$LINK: PUBCHEM CID:99558
+CH$LINK: INCHIKEY GHSJKUNUIHUPDF-BYPYZUCNSA-N
+CH$LINK: CHEMSPIDER 89945
+AC$INSTRUMENT: LTQ Orbitrap Velos Thermo Scientific
+AC$INSTRUMENT_TYPE: LC-ESI-ITFT
+AC$MASS_SPECTROMETRY: MS_TYPE MS2
+AC$MASS_SPECTROMETRY: ION_MODE POSITIVE
+AC$MASS_SPECTROMETRY: IONIZATION ESI
+AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE HCD
+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 50 % (nominal)
+AC$MASS_SPECTROMETRY: RESOLUTION 7500
+AC$CHROMATOGRAPHY: COLUMN_NAME HSS T3 1.7 um, 2x150 mm, Waters
+AC$CHROMATOGRAPHY: FLOW_GRADIENT 100/0 at 0 min, 90/10 at 7.5 min, 0/100 at 10 min, 0/100 at 12 min, 100/0 at 18 min, 100/0 at 25 min
+AC$CHROMATOGRAPHY: FLOW_RATE 250 uL/min at 0 min, 400 uL/min at 7.5 min
+AC$CHROMATOGRAPHY: RETENTION_TIME 1.2 min
+AC$CHROMATOGRAPHY: SOLVENT A 0.2% Formic Acid
+AC$CHROMATOGRAPHY: SOLVENT B 98/2/0.2 Acetonitrile/Water/Formic Acid
+MS$FOCUSED_ION: BASE_PEAK 165.069
+MS$FOCUSED_ION: PRECURSOR_M/Z 165.0692
+MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+
+MS$DATA_PROCESSING: RECALIBRATE loess on assigned fragments and MS1
+MS$DATA_PROCESSING: REANALYZE Peaks with additional N2/O included
+MS$DATA_PROCESSING: WHOLE RMassBank 1.7.0
+PK$SPLASH: splash10-00di-0900000000-99e0ec9e6034dff32dc8
+PK$ANNOTATION: m/z tentative_formula formula_count mass error(ppm)
+  76.0215 C2H6NS+ 1 76.0215 -1.27
+  88.0392 C3H6NO2+ 1 88.0393 -1.19
+  92.0162 C2H6NOS+ 1 92.0165 -2.73
+  102.037 C4H8NS+ 1 102.0372 -1.93
+  109.0271 C4H3N3O+ 1 109.0271 0.61
+  120.0112 C3H6NO2S+ 1 120.0114 -1.8
+  148.0424 C5H10NO2S+ 1 148.0427 -1.8
+  165.0699 C5H13N2O2S+ 1 165.0692 4.09
+  174.0753 C5H10N4O3+ 1 174.0747 3.5
+PK$NUM_PEAK: 9
+PK$PEAK: m/z int. rel.int.
+  76.0215 18351.9 16
+  88.0392 41980.6 36
+  92.0162 9969.8 8
+  102.037 24583.1 21
+  109.0271 1331.3 1
+  120.0112 1140642.2 999
+  148.0424 40689.7 35
+  165.0699 12929.9 11
+  174.0753 1548.6 1
+//
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/RP022611.txt	Wed Feb 05 10:41:48 2020 -0500
@@ -0,0 +1,48 @@
+ACCESSION: RP022611
+RECORD_TITLE: D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-
+DATE: 2017.11.29
+AUTHORS: BGC, Helmholtz Zentrum Muenchen
+LICENSE: CC BY
+COPYRIGHT: Copyright (C) 2017
+COMMENT: CONFIDENCE standard compound
+COMMENT: INTERNAL_ID 226
+CH$NAME: D-Glucose
+CH$NAME: (3R,4S,5S,6R)-6-(hydroxymethyl)oxane-2,3,4,5-tetrol
+CH$COMPOUND_CLASS: N/A; Metabolomics Standard
+CH$FORMULA: C6H12O6
+CH$EXACT_MASS: 180.0634
+CH$SMILES: OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O
+CH$IUPAC: InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1
+CH$LINK: CAS 50-99-7
+CH$LINK: CHEBI 4167
+CH$LINK: KEGG C00031
+CH$LINK: PUBCHEM CID:5793
+CH$LINK: INCHIKEY WQZGKKKJIJFFOK-GASJEMHNSA-N
+CH$LINK: CHEMSPIDER 5589
+AC$INSTRUMENT: maXis plus UHR-ToF-MS, Bruker Daltonics
+AC$INSTRUMENT_TYPE: LC-ESI-QTOF
+AC$MASS_SPECTROMETRY: MS_TYPE MS2
+AC$MASS_SPECTROMETRY: ION_MODE NEGATIVE
+AC$MASS_SPECTROMETRY: IONIZATION ESI
+AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE CID
+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 10
+AC$CHROMATOGRAPHY: COLUMN_NAME BEH C18 1.7um, 2.1x100mm, Waters
+AC$CHROMATOGRAPHY: FLOW_GRADIENT 95/5 at 0 min, 95/5 at 1.12 min, 0.5/99.5 at 6.41 min, 0.5/99.5 at 10.01 min
+AC$CHROMATOGRAPHY: FLOW_RATE 400 uL/min
+AC$CHROMATOGRAPHY: RETENTION_TIME 0.604 min
+AC$CHROMATOGRAPHY: SOLVENT A Water with 0.1% formic acid
+AC$CHROMATOGRAPHY: SOLVENT B ACN with 0.1% formic acid
+MS$FOCUSED_ION: BASE_PEAK 179.0572
+MS$FOCUSED_ION: PRECURSOR_M/Z 179.0561
+MS$FOCUSED_ION: PRECURSOR_TYPE [M-H]-
+MS$DATA_PROCESSING: REANALYZE Peaks with additional N2/O included
+MS$DATA_PROCESSING: WHOLE RMassBank 2.4.0
+PK$SPLASH: splash10-059i-9000000000-fd62712fc14434a3aa53
+PK$NUM_PEAK: 5
+PK$PEAK: m/z int. rel.int.
+  59.0138 278 715
+  71.014 264 679
+  72.9928 30 77
+  89.0251 388 999
+  101.0234 40 102
+//
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/RP022611_result.tsv	Wed Feb 05 10:41:48 2020 -0500
@@ -0,0 +1,9 @@
+name	inchikey2D	inchi	molecularFormula	rank	score	name	smiles	xlogp	pubchemids	links
+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	WQZGKKKJIJFFOK	InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2	C6H12O6	1	-2990.5565	D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	C(C1C(C(C(C(O)O1)O)O)O)O		206;5793;6036;18950;64689;79025;81696;185698;439353;439357;439507;439583;439680;441035;441032;441033;441034;444314;448388;448702;451187;451188;451189;452245;455147;657055;1549080;2724488;3000450;3034742;5104362;5319264;6102790;6321330;6323336;6400264;6560213;6971003;6971007;6971016;6971096;6971097;6971098;6992021;6992084;7018164;7043897;7044038;7098663;7098664;7157007;9794056;9815418;9834129;9899007;10035228;10081060;10103794;10130220;10197954;10219674;10219763;10313382;10329946;10899282;10954241;11019447;11030410;11344362;11367383;11412863;11480819;11492034;11571906;11571917;11600783;11651921;11672764;11959770;11970126;12003287;12193653;12285853;12285856;12285861;12285862;12285863;12285866;12285868;12285869;12285870;12285871;12285873;12285877;12285878;12285879;12285885;12285886;12285889;12285890;12285891;12285892;12285893;12285894;16211884;16211941;16211984;16211986;16212959;16212960;16212966;16213546;16213640;16213872;16217112;16219580;21355827;22825318;22836365;22836366;23424086;24802149;24802163;24802281;24892722;42626680;44328781;44328785;46188479;46780441;46897877;50939543;51340651;54445181;54445182;56845432;56845995;57197748;57288387;57483528;57691826;57973135;58070804;58265153;58265160;58265166;58265178;58265190;58265196;58300638;58595959;58594768;58618581;58969552;59034276;59036328;59040622;59083882;59105109;59125088;59146659;59383280;59445439;59503407;59503411;59886072;59965103;60052896;60078648;66629908;67518639;67615000;67615455;67641738;67938791;67944215;67944290;67950444;68167579;68324677;68334110;69528681;70443535;70543261;71309028;71309128;71309129;71309140;71309397;71309503;71309513;71309514;71309671;71309852;71309905;71309908;71309927;71317094;71317095;71317096;71317097;71317182;71777654;75357255;76973265;86278404;87297824;87929779;87931119;88255060;88547603;88974141;89000581;89200515;89332529;89374440;89424182;89742272;89855666;90057933;90159939;90346255;90470917;90472751;90472752;90472753;90472761;90472762;90472770;90473076;90781811;90895196;91057721;92043367;92043446;101015849;101033892;101254308;101254309;101254310;101254311;101254312;101254313;101254314;101254315;101469918;101513786;101718250;101718251;101796201;102089288;102447462;102447463;102601142;102601177;102601371;102601743;102601816;117064633;117064644;117065485;117633116;117768413;117938207;118797420;118797610;118797621;118797622;118855887;118855889;118855904;118855910;118855920;118855925;118924468;121494058;121494046;122360911;122522140;125280077;125280078;125280079;125280080;125309563;125309564;125353406;125363512;125363513;125363514;125363515;126704391	HMDB:(62202);KNApSAcK:(1126);Natural Products:(UNPD148053 UNPD72621 UNPD116684 UNPD119270 UNPD130932 UNPD158921 UNPD83717 UNPD175249 UNPD175204 UNPD191130 UNPD20367 UNPD175399);CHEBI:(37692 37671 37693 63421 37630 27667 72452 4191 4093 37619 15903 80962 37631 17925 37677 15444 37679 27380 4208 18246 16362 28729 37680 18269 4167 37740 59573 59552 27517 28100 37706 83029 28563 28102 37620 37686 37741 86059 4139 37744 68462 37627 28061 37704);KEGG:(C21032 C00221 C00031 C21050 C02209 C01825 C15923 C00936 C00738 C00737 C06465 C06466 C06464 C00984 C00962 C00159 C06467 C01487 C00267 C00124);Plantcyc:(ALPHA-GLUCOSE L-GALACTOSE ALPHA-D-GALACTOSE CPD-12601 CPD-13559 GALACTOSE CPD-15761 CPD-3607 CPD-15762 510-methenyl-thf GLC);YMDB:(894);Biocyc:(CPD-11613 CPD-13428 CPD-11611 CPD-13559 CPD-12844 CPD-15758 CPD-15627 CPD-15759 Alpha-D-Talose CPD-15628 L-SORBOSONE CPD-18461 CPD-15622 CPD-15624 CPD-15625 CPD-15757 CPD-15761 CPD-3607 CPD-15762 CPD-15621)
+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	RFSUNEUAIZKAJO	InChI=1S/C6H12O6/c7-1-3-4(9)5(10)6(11,2-8)12-3/h3-5,7-11H,1-2H2	C6H12O6	2	-2992.81068	D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	C(C1C(C(C(CO)(O)O1)O)O)O		716;439163;439553;439709;11008518;11105942;11378852;11769129;12306006;12306007;12306010;12306011;12306012;12306013;12306014;12306016;15942891;21581131;24755524;24755531;50990586;58798223;59105060;59642118;59748470;60078501;60109622;66809988;68009591;68015592;69261724;69261935;69261937;71310006;71310036;71529761;89810242;89810768;90159920;90346952;90347094;102193695;117935612	HMDB:(660);KNApSAcK:(1117);Natural Products:(UNPD19574 UNPD185250 UNPD163774 UNPD109385);CHEBI:(48648 48647 48646 28645 37727 49090 48673 37720 37721 48672 49089 49088 48670 37725 29084);KEGG:(C02336 C00095 C01719);Plantcyc:(BETA-D-FRUCTOSE);Biocyc:(CPD-10723 CPD-10729 CPD-15988 CPD-15989 CPD-10730)
+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	LKDRXBCSQODPBY	InChI=1S/C6H12O6/c7-2-6(11)5(10)4(9)3(8)1-12-6/h3-5,7-11H,1-2H2	C6H12O6	3	-3006.71254	D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	C1C(C(C(C(CO)(O)O1)O)O)O		3426;24310;439192;439304;439312;440545;441036;441484;2723872;2724552;5317407;6432703;6915737;6971020;6971021;6971099;6992107;10130221;10130228;10154314;10176372;11355843;14408225;15559359;16212688;16213406;16213544;16213545;22814148;24802515;45039313;51340644;51340682;52916942;52916943;57745769;59575442;59875236;71308848;71309127;71309810;71309883;71751872;71752285;89015893;89174364;89333506;89345843;89360325;89360759;89363316;89810855;90472720;90472721;90472746;91329420;91654167;101763542;101763543;102602138;118797422;118797598;118855901;118855902;118855927;119077570;121494037;121494038;121494039;121494041;121494042;125300503;125300504;125300505;125300506;125322958;125322959;125322960;125356688;129275707	HMDB:(1266);Natural Products:(UNPD1409 UNPD51200 UNPD43618 UNPD196486 UNPD14140 UNPD69968);CHEBI:(48645 48678 48677 10295 37728 37729 49092 37719 49091 48674 4249 37714 37715);HSDB:(7660-25-5);KEGG:(C08356 C05003 C00764 C00247 C06468 C00795);Plantcyc:(CPD-10726);YMDB:(204);Biocyc:(CPD-15986 CPD-10728 CPD-15987 CPD-10726 CPD-10727 CPD-10725)
+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	BJHIKXHVCXFQLS	InChI=1S/C6H12O6/c7-1-3(9)5(11)6(12)4(10)2-8/h3,5-9,11-12H,1-2H2	C6H12O6	4	-3015.06916	D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	C(C(C(C(C(=O)CO)O)O)O)O		1101;5984;6904;90008;92092;107428;5460024;10965117;11458041;11961810;15559364;15559365;15559366;15559367;15975980;56846514;71310259;87203108;87883498;88364517;89357936;90194848;90471261;100938761;100938762;100938763;100938764;100938765;101274261;102026061;102525471;126737088	HMDB:(62538);KNApSAcK:(33848);Natural Products:(UNPD157348 UNPD11673 UNPD28362 UNPD42482);CHEBI:(13172 13022 134275 47693 27605 37724 27922);HSDB:(87-79-6);KEGG:(C21523 C10906 C01452);Plantcyc:(CPD-9570 CPD-15616 PSICOSE CPD-15382 TAGATOSE);YMDB:(657);Biocyc:(CPD-15825 PSICOSE)
+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	GZCGUPFRVQAUEE	InChI=1S/C6H12O6/c7-1-3(9)5(11)6(12)4(10)2-8/h1,3-6,8-12H,2H2	C6H12O6	5	-3017.53465	D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	C(=O)C(C(C(C(CO)O)O)O)O		24749;64731;80127;82308;84996;90173;94780;99459;102190;102288;107526;111112;111123;111317;134512;161658;165139;165171;165863;166991;167792;168037;169509;187891;3037556;3086538;5460248;6451569;10910141;10954115;11229130;11355844;11745248;11805319;12305796;12305797;12305799;12305800;12305811;15977259;16057040;21183545;45109778;53462839;53462852;53462878;56846079;56846519;56846584;57449163;57557846;58654615;58654624;60078498;60101813;66509130;71309394;71309492;71309493;71310055;71310073;71434190;71777455;76973373;87109007;87228435;87228929;87229000;87355288;88034483;88353328;89242343;89317890;89327884;89327885;89472723;89623639;90132269;90273086;90472355;90472363;92023398;92043770;92044000;100917967;101117002;101117003;101129024;101129025;101129026;101129027;101248541;101261456;101265967;101446815;101719777;101728293;101728294;102505103;102601198;102601265;102601267;102601589;102601778;102602086;119077284;119078796;126664755	HMDB:(62473);Natural Products:(UNPD142849 UNPD3363 UNPD7578 UNPD95755 UNPD35192 UNPD45514);CHEBI:(37681 37695 28385 37617 28014 37675 33917 86058 37701 37746 17118 37626 68461);HSDB:(50-99-7);KEGG:(C01582);Plantcyc:(CPD-15373 CPD-15374 CPD-15590);YMDB:(789);Biocyc:(CPD-7409 CPD-15626 CPD-7408 CPD-9728 UDP-GLACTOSE CPD1G-120 CPD-15629 CPD1G-2 CPD-9327 4-AMINO-BUTYRALDEHYDE CPD-7420 CPD-15590 CPD1F-130 CHOLESTEROL_ESTER CPD-15756 4-TOLUENESULFONATE CPD1F-98 DEMETHYLMENAQUINONE UBIQUINONE-9 CPD-7419 ACETONE CPD1F-129 PARATHION CPD-15383 IRON-CHELATE CPD-15760 CPD-15620 VITAMIN_K_2)
+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	CDAISMWEOUEBRE	InChI=1S/C6H12O6/c7-1-2(8)4(10)6(12)5(11)3(1)9/h1-12H	C6H12O6	6	-3046.13844	D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	C1(C(C(C(C(C1O)O)O)O)O)O		892;11973225;12302985;53714837;68591801;90768658;91019724;100996307;100996308	HMDB:(34220);KNApSAcK:(1164);Natural Products:(UNPD40912 UNPD103126 UNPD106247 UNPD16776 UNPD54610 UNPD50920 UNPD136396 UNPD185125 UNPD191761);CHEBI:(27374 17268 27372 24848 25492 23927 27987 23311 10642 22357);KEGG:(C19891 C06153 C00137 C06151 C06152);Plantcyc:(MYO-INOSITOL CPD-8052 CPD-8059 CPD-8050);YMDB:(173);Biocyc:(CPD-8055 CPD-8054 CPD-8053 CPD-8059 CPD-8061 CPD-8060)
+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	KEQUNHIAUQQPAC	InChI=1S/C6H12O6/c7-1-5(9)3-12-6(10,2-8)4-11-5/h7-10H,1-4H2	C6H12O6	7	-3051.24166	D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	C(C1(COC(CO)(CO1)O)O)O		2723627;4180364;13560352;40503129;124202832	HMDB:(32222)
+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	YGMNHEPVTNXLLS	InChI=1S/C6H12O6/c7-2-5(10)3(8)1-4(9)6(11)12/h3-5,7-10H,1-2H2,(H,11,12)	C6H12O6	8	-3052.67808	D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-	C(C(C(CO)O)O)C(C(=O)O)O		10350;152990;5289313;14122626;15560246;21596764;21596765;21596766;21596767;21596768;21596769;21596770;58966097;88049798;89007240;89391706;101963537;101963539	HMDB:(346)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo_db.csv	Wed Feb 05 10:41:48 2020 -0500
@@ -0,0 +1,8 @@
+"Identifier","MonoisotopicMass","MolecularFormula","SMILES","InChI","InChIKey1","InChIKey2","InChIKey3","Name","InChIKey"
+"HMDB0000123",75.03202841,"C2H5NO2","NCC(O)=O","InChI=1S/C2H5NO2/c3-1-2(4)5/h1,3H2,(H,4,5)","DHMQDGOQFOQNFH","UHFFFAOYSA","N","Glycine","DHMQDGOQFOQNFH-UHFFFAOYSA-N"
+"HMDB0002151",78.0139355,"C2H6OS","CS(C)=O","InChI=1S/C2H6OS/c1-4(2)3/h1-2H3","IAZDPXIOMUYVGZ","UHFFFAOYSA","N","Dimethyl sulfoxide","IAZDPXIOMUYVGZ-UHFFFAOYSA-N"
+"HMDB0031239",75.03202841,"C2H5NO2","CCON=O","InChI=1S/C2H5NO2/c1-2-5-3-4/h2H2,1H3","QQZWEECEMNQSTG","UHFFFAOYSA","N","Ethyl nitrite","QQZWEECEMNQSTG-UHFFFAOYSA-N"
+"HMDB0014691",75.03202841,"C2H5NO2","CC(=O)NO","InChI=1S/C2H5NO2/c1-2(4)3-5/h5H,1H3,(H,3,4)","RRUDCFGSUDOHDG","UHFFFAOYSA","N","Acetohydroxamic Acid","RRUDCFGSUDOHDG-UHFFFAOYSA-N"
+"HMDB0002039",85.05276385,"C4H7NO","O=C1CCCN1","InChI=1S/C4H7NO/c6-4-2-1-3-5-4/h1-3H2,(H,5,6)","HNJBEVLQSNELDL","UHFFFAOYSA","N","2-Pyrrolidinone","HNJBEVLQSNELDL-UHFFFAOYSA-N"
+"HMDB0060427",85.05276385,"C4H7NO","CC(C)(O)C#N","InChI=1S/C4H7NO/c1-4(2,6)3-5/h6H,1-2H3","MWFMGBPGAXYFAR","UHFFFAOYSA","N","Acetone cyanohydrin","MWFMGBPGAXYFAR-UHFFFAOYSA-N"
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/generic.msp	Wed Feb 05 10:41:48 2020 -0500
@@ -0,0 +1,123 @@
+NAME:  MZ:70.0659 | RT:1483 | XCMS_group:1 | file:1 | scan:NA | pid:NA
+PRECURSORMZ: 70.0658950805664
+Comment:
+Num Peaks: 8
+50.4781379699707	3487.4296875	4.61
+51.0193099975586	3390.96948242188	4.49
+53.0031509399414	10011.958984375	13.25
+53.5898513793945	4252.7880859375	5.63
+54.3787727355957	3541.5107421875	4.69
+69.0455169677734	9650.0107421875	12.77
+70.0660934448242	37168.609375	49.18
+82.9910659790039	4077.36694335938	5.39
+
+NAME:  MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA
+PRECURSORMZ: 72.0815277099609
+COMMENT:
+Num Peaks: 6
+51.773567199707	818.313903808594	10.98
+54.0346794128418	1247.91137695312	16.75
+54.6847991943359	967.616882324219	12.98
+56.050350189209	1780.01037597656	23.90
+58.4994125366211	975.196228027344	13.09
+72.0814056396484	1660.50390625	22.29
+
+NAME:  MZ:72.0815 | RT:1857 | scan:NA
+PRECURSORMZ: 72.08154296875
+COMMENT:
+Num Peaks: 4
+56.0504341125488	1838.78173828125	46.54
+59.9103507995605	701.556762695312	17.75
+63.7723731994629	650.224975585938	16.46
+72.0814590454102	760.228637695312	19.25
+
+NAME:  MZ:76.0400 | RT:1606 | XCMS_group:5 | file:1 | scan:NA
+PRECURSORMZ: 76.0400390625
+COMMENT:
+Num Peaks: 4
+53.2376174926758	3224.35571289062	25.41
+60.3291244506836	3193.19482421875	25.17
+73.7529830932617	3305.61401367188	26.05
+82.5309600830078	2965.41772460938	23.37
+
+NAME:  MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA
+PRECURSORMZ: 79.0218658447266
+COMMENT:
+Num Peaks: 7
+53.6282501220703	15316.7431640625	1.07
+59.967342376709	251727.734375	17.51
+61.0115814208984	80113.8046875	5.57
+62.9908714294434	93065.1015625	6.47
+63.9986305236816	950876.9375	66.13
+79.0219345092773	33032.984375	2.30
+95.4936447143555	13826.033203125	0.96
+
+NAME:  MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA
+PRECURSORMZ: 79.0218811035156
+COMMENT:
+Num Peaks: 5
+59.1125831604004	67799.1953125	3.10
+59.9673652648926	345613.1875	15.83
+62.9906845092773	117693.296875	5.39
+63.9986686706543	1585970.25	72.62
+80.5974655151367	66719.4609375	3.06
+
+NAME:  MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA
+PRECURSORMZ: 79.0218887329102
+COMMENT:
+Num Peaks: 12
+53.1700401306152	2441.47143554688	2.54
+55.1893730163574	2006.07958984375	2.08
+58.9013671875	2539.39086914062	2.64
+59.9673500061035	13423.1376953125	13.94
+61.0115776062012	4831.0986328125	5.02
+62.9908828735352	3668.52905273438	3.81
+63.9986190795898	54386.6640625	56.50
+73.8388671875	2330.30126953125	2.42
+78.5768051147461	2563.25	2.66
+79.0221328735352	2581.44604492188	2.68
+96.8009872436523	2530.70141601562	2.63
+99.6652908325195	2961.3095703125	3.08
+
+NAME:  MZ:79.9904 | RT:1284 | XCMS_group:11 | file:1 | scan:NA
+PRECURSORMZ: 79.9903564453125
+COMMENT:
+Num Peaks: 3
+51.6917915344238	584.212829589844	31.93
+53.0398750305176	649.807922363281	35.48
+97.3154754638672	596.341003417969	32.59
+
+NAME:  MZ:86.0607 | RT:1497 | XCMS_group:19 | file:1 | scan:NA
+PRECURSORMZ: 86.060661315918
+COMMENT:
+Num Peaks: 4
+53.0031318664551	9658.7939453125	60.81
+53.1939277648926	1998.81518554688	12.58
+80.3447494506836	2044.23645019531	12.87
+101.307479858398	2181.85522460938	13.73
+
+NAME:  MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA
+PRECURSORMZ: 86.0606307983398
+COMMENT:
+Num Peaks: 11
+52.6782836914062	1061.12646484375	3.59
+53.0032196044922	15176.8583984375	51.38
+53.1121788024902	1193.6044921875	4.039
+53.9984169006348	2790.28930664062	9.45
+54.0287094116211	999.250427246094	3.38
+56.7024726867676	1171.42797851562	3.96
+69.0346069335938	1878.03894042969	3.36
+72.9083633422852	1256.455078125	4.25
+74.0740356445312	1324.07055664062	4.48
+80.5324630737305	1329.61022949219	4.50
+91.0167770385742	1362.0029296875	4.61
+
+NAME:  MZ:86.0607 | RT:1500 | XCMS_group:19 | file:1 | scan:NA
+PRECURSORMZ: 86.0606536865234
+COMMENT:
+Num Peaks: 5
+53.0031509399414	29580.330078125	61.35
+55.3490409851074	4989.64990234375	10.35
+61.990592956543	4089.9619140625	8.48
+63.2290992736816	4168.97412109375	8.64
+67.6647109985352	5392.48779296875	11.18
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/generic.tsv	Wed Feb 05 10:41:48 2020 -0500
@@ -0,0 +1,7 @@
+name	inchikey2D	inchi	molecularFormula	rank	score	name	smiles	xlogp	pubchemids	links
+MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA	HNJBEVLQSNELDL	InChI=1S/C4H7NO/c6-4-2-1-3-5-4/h1-3H2,(H,5,6)	C4H7NO	1	-149.0988	MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA	C1CC(=NC1)O		12025;3956071;10419134;12197590;12197592;18999930;20030003;20589568;58329813;90472990;91343693;101225382;101796586;123509162	HMDB:(2039);KNApSAcK:(38233);Natural Products:(UNPD211738);CHEBI:(36592);HSDB:(616-45-5);Plantcyc:(CPD-19607)
+MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA	MWFMGBPGAXYFAR	InChI=1S/C4H7NO/c1-4(2,6)3-5/h6H,1-2H3	C4H7NO	2	-169.83339	MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA	CC(C)(C#N)O		6406;10486800;91131204	HMDB:(60427);Natural Products:(UNPD47968);CHEBI:(15348);HSDB:(75-86-5);KEGG:(C02659);Plantcyc:(2-HYDROXY-2-METHYLPROPANENITRILE)
+MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA	RWRDLPDLKQPQOW	InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2	C4H9N	1	-136.14546	MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA	C1CCNC1		31268;3613359;11062297;12196044;12196046;12196049;12196050;18440991;20463768;53660610;57608708;57608709;57608710;57750053;60135501;90927493;91312985	HMDB:(31641);Natural Products:(UNPD154562);CHEBI:(33135 52145);HSDB:(123-75-1);Plantcyc:(PYRROLIDINE);Biocyc:(PYRROLIDINE)
+MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA	IAZDPXIOMUYVGZ	InChI=1S/C2H6OS/c1-4(2)3/h1-2H3	C2H6OS	1	-75.82312	MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA	CS(=O)C		679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578	HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)
+MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA	IAZDPXIOMUYVGZ	InChI=1S/C2H6OS/c1-4(2)3/h1-2H3	C2H6OS	1	-86.79175	MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA	CS(=O)C		679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578	HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)
+MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA	IAZDPXIOMUYVGZ	InChI=1S/C2H6OS/c1-4(2)3/h1-2H3	C2H6OS	1	-75.67854	MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA	CS(=O)C		679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578	HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/historic.tsv	Wed Feb 05 10:41:48 2020 -0500
@@ -0,0 +1,3 @@
+"name"	"source"	"experimentName"	"confidence"	"inchikey2D"	"inchi"	"molecularFormula"	"rank"	"score"	"name"	"smiles"	"xlogp"	"pubchemids"	"links"
+"19"	"2_tmpspec"	""	"0.0"	"RWRDLPDLKQPQOW"	"InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2"	"C4H9N"	"1"	"-136.14546214244544"	"19"	"C1CCNC1"	""	"31268;3613359;11062297;12196044;12196046;12196049;12196050;18440991;20463768;53660610;57608708;57608709;57608710;57750053;60135501;90927493;91312985"	"HMDB:(31641);Natural Products:(UNPD154562);CHEBI:(33135 52145);HSDB:(123-75-1);Plantcyc:(PYRROLIDINE);Biocyc:(PYRROLIDINE)"
+"19"	"4_tmpspec"	""	"0.0"	"IAZDPXIOMUYVGZ"	"InChI=1S/C2H6OS/c1-4(2)3/h1-2H3"	"C2H6OS"	"1"	"-86.79174845072117"	"19"	"CS(=O)C"	""	"679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578"	"HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/historic_input.msp	Wed Feb 05 10:41:48 2020 -0500
@@ -0,0 +1,61 @@
+NAME: 1
+PRECURSORMZ: 70.0658950805664
+Comment:
+Num Peaks: 8
+50.4781379699707	3487.4296875
+51.0193099975586	3390.96948242188
+53.0031509399414	10011.958984375
+53.5898513793945	4252.7880859375
+54.3787727355957	3541.5107421875
+69.0455169677734	9650.0107421875
+70.0660934448242	37168.609375
+82.9910659790039	4077.36694335938
+
+NAME: 2
+PRECURSORMZ: 72.0815277099609
+Comment:
+Num Peaks: 6
+51.773567199707	818.313903808594
+54.0346794128418	1247.91137695312
+54.6847991943359	967.616882324219
+56.050350189209	1780.01037597656
+58.4994125366211	975.196228027344
+72.0814056396484	1660.50390625
+
+NAME: 5
+PRECURSORMZ: 76.0400390625
+Comment:
+Num Peaks: 4
+53.2376174926758	3224.35571289062
+60.3291244506836	3193.19482421875
+73.7529830932617	3305.61401367188
+82.5309600830078	2965.41772460938
+
+NAME: 9
+PRECURSORMZ: 79.0218811035156
+Comment:
+Num Peaks: 5
+59.1125831604004	67799.1953125
+59.9673652648926	345613.1875
+62.9906845092773	117693.296875
+63.9986686706543	1585970.25
+80.5974655151367	66719.4609375
+
+NAME: 11
+PRECURSORMZ: 79.9903564453125
+Comment:
+Num Peaks: 3
+51.6917915344238	584.212829589844
+53.0398750305176	649.807922363281
+97.3154754638672	596.341003417969
+
+NAME: 19
+PRECURSORMZ: 86.0606536865234
+Comment:
+Num Peaks: 5
+53.0031509399414	29580.330078125
+55.3490409851074	4989.64990234375
+61.990592956543	4089.9619140625
+63.2290992736816	4168.97412109375
+67.6647109985352	5392.48779296875
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/invalid_adduct.msp	Wed Feb 05 10:41:48 2020 -0500
@@ -0,0 +1,13 @@
+NAME:  MZ:70.0659 | RT:1483 | XCMS_group:1 | file:1 | scan:NA | pid:NA
+PRECURSORMZ: 70.0658950805664
+ADDUCT: [M+INVALID_ADDUCT]+
+Comment:
+Num Peaks: 8
+50.4781379699707	3487.4296875	4.61
+51.0193099975586	3390.96948242188	4.49
+53.0031509399414	10011.958984375	13.25
+53.5898513793945	4252.7880859375	5.63
+54.3787727355957	3541.5107421875	4.69
+69.0455169677734	9650.0107421875	12.77
+70.0660934448242	37168.609375	49.18
+82.9910659790039	4077.36694335938	5.39
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sirus_csifingerid_test1.tsv	Wed Feb 05 10:41:48 2020 -0500
@@ -0,0 +1,4 @@
+UID	InChIkey2D	InChI	molecularFormula	Rank	Score	Name	smiles	xlogp	pubchemids	links
+2	RWRDLPDLKQPQOW	InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2	C4H9N	1	-136.14546214244544	Azolidine	C1CCNC1		31268;3613359;11062297;12196044;12196046;12196049;12196050;18440991;20463768;53660610;57608708;57608709;57608710;57750053;60135501;90927493;91312985	HMDB:(31641);Natural Products:(UNPD154562);CHEBI:(33135 52145);HSDB:(123-75-1);Plantcyc:(PYRROLIDINE);Biocyc:(PYRROLIDINE)
+UID	InChIkey2D	InChI	molecularFormula	Rank	Score	Name	smiles	xlogp	pubchemids	links
+9	IAZDPXIOMUYVGZ	InChI=1S/C2H6OS/c1-4(2)3/h1-2H3	C2H6OS	1	-86.79174845072117	Demasorb	CS(=O)C		679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578	HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)