Repository 'metfrag'
hg clone https://toolshed.g2.bx.psu.edu/repos/computational-metabolomics/metfrag

Changeset 0:fd5c0b39569a (2020-02-05)
Next changeset 1:9ee2e2ceb2c9 (2020-03-19)
Commit message:
"planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit e20ce56f23d9fe30df64542ece2295d654ca142d"
added:
UNPD_DB.inchikeys.txt
config.ini
macros.xml
metfrag.py
metfrag.xml
test-data/A06_spec_trees_merged_pls.msp
test-data/A06_spec_trees_non_merged_pls.msp
test-data/RP022611.tabular
test-data/RP022611.txt
test-data/RP022611_all_col.tabular
test-data/RP022611_suspect.txt
test-data/RP022611_suspect_default.txt
test-data/demo_db.csv
test-data/dup_check.txt
test-data/generic_format.msp
test-data/invalid_adduct.msp
test-data/invalid_adduct_result.txt
test-data/massbank_format.txt
test-data/metfrag_massbank.tabular
test-data/metfrag_msp.tabular
test-data/winter_pos.msp
test-data/winter_pos.tabular
b
diff -r 000000000000 -r fd5c0b39569a UNPD_DB.inchikeys.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/UNPD_DB.inchikeys.txt Wed Feb 05 12:30:06 2020 -0500
b
b'@@ -0,0 +1,207491 @@\n+AAANZTDKTFGJLZ\n+AADZLWJXMBAKKE\n+AAHHASSEIOXLIB\n+AAJDHOXHRGMSLB\n+AAKKARXDZDUXHU\n+AALUKTCMUIGJEG\n+AALYQEIWIWMEAJ\n+AAMXRZJYSRMQQF\n+AANLEWIAEUDQBM\n+AAPGEKMDLXBUBL\n+AARQIJVWMWVDNS\n+AAUDLZNGLZWUIC\n+AAUMBOCDCYMWBT\n+AAVGOORHZRJMPA\n+AAVRFXAMWONIOE\n+AAYOMAGHACYWJR\n+AAYYAURDUHQZGK\n+AAZRLGLWPJJDGC\n+AAZRRHBFRPIGGY\n+ABBIOIHDQBNQFR\n+ABEFPCRGBOFMDC\n+ABEFZRUFMKMACI\n+ABFQREMAZQPJLS\n+ABGLVXROVAEWNL\n+ABIDBJGCXGVVTH\n+ABLLSLSHQGLOPV\n+ABLNQMKQYJQVRR\n+ABPFLYNCUOPTSB\n+ABQDDNHUZOOWMZ\n+ABSJDCQHYMKVLW\n+ABVGARAGGTXCEH\n+ABVOEFOLDBPXHB\n+ABXZFOZVZWDICN\n+ABZLULXOKFPLMY\n+ACBLGYUNJSYJPY\n+ACBMIOXLRKBKCN\n+ACCYCJOHUMRMMV\n+ACDWEIMFJUNXIU\n+ACFIXJIJDZMPPO\n+ACIODEMNMSESPB\n+ACJAQJHYAGLMOL\n+ACOKXVSLVQCZRD\n+ACPQNZRBESJWHB\n+ACRMTHSZHIGDGF\n+ACSFOIGNUQUIGE\n+ACTKFDMFHYIKRY\n+ADAVRJAERLROQG\n+ADAXNLYFMSSRDG\n+ADBMMSFXIFGNAX\n+ADCWWONRVSXNJV\n+ADDCNOCQPWDJSR\n+ADECHVGDPIBMSZ\n+ADEVBUKPYQGFGW\n+ADFAUEYXQCOXBX\n+ADFIQZBYNGPCGY\n+ADGKVPFTEYUPDW\n+ADNDAPRXIIPWCD\n+ADONXPDCJFWYAQ\n+ADOYBIJMQNENDD\n+ADWFEADZGIHPDE\n+ADWNZQSKPRAHTJ\n+ADXNIJXMFVSXDQ\n+ADYQZVXAZYQCAZ\n+ADZJHBDSFDCAED\n+AEDBOXCTQDATMF\n+AEHNGXKQQZDWGN\n+AEHNYMQMGXWKFF\n+AEKWITXEVIHECI\n+AELKPKGSENHRJJ\n+AEMHLIHHQUOOGP\n+AEOXGHCEDBMESQ\n+AEPJQKVEJDKGIZ\n+AEQDXSFIHGWHDV\n+AERCZABCWOQOSG\n+AERWGAIGOLLQBV\n+AESQFXGSQACQRK\n+AEVZZALQJYKVBB\n+AFGBEDQMTFPVJS\n+AFGMWONXXNDGGE\n+AFHWRWXCMNWXMK\n+AFRGWGGHJYMSDU\n+AFRNHJDBERWLNW\n+AFTLDBVPRCDOJK\n+AFWVQOPSOKQLPI\n+AFXPJFXBBQEMAB\n+AGABNGOXUSXQDD\n+AGGKSJHXZHNDNY\n+AGHYJAIHRCTPKR\n+AGILGFCOHSGLIT\n+AGKHPQOHHBHUDT\n+AGKYIOIIEPFTEV\n+AGOVRGTVIPXJEC\n+AGPBUGPNAKWRAP\n+AGPYECDHKQOXNE\n+AGVBOCPGXNSSLL\n+AGXFGQBFMFJFCD\n+AGYJKFFPLORZSS\n+AGZLPSPSDWJGIW\n+AHBDVZCKXAMRQQ\n+AHEDZGDPQGHTSN\n+AHGBXGUKDIOFOR\n+AHIQFGJYCGHEEQ\n+AHOKGTBIWXGZNE\n+AHVFQRYKZZMJTC\n+AHWUIMDBTDTTRI\n+AHYOMNWKYGMYMB\n+AHYSWFBBMLPXFU\n+AIAOQTBKFWFIJZ\n+AIBOHNYYKWYQMM\n+AIDVIDPTMJXLQO\n+AIHACJYGATZDDL\n+AIHRLDOFCXDRCZ\n+AIHZGBQMMTZCDW\n+AIJMQKVEPZZTBT\n+AILCSCQIQZTQJR\n+AIMMTSCLXAKBFQ\n+AIQMLBKBQCVDEY\n+AIUGIYMSXFPBFA\n+AIWCFAFGTGFHKV\n+AIYRJNOLOMUWJR\n+AIYUBYPLDAUXRO\n+AJAXYTHTWGIEHL\n+AJCUWDKHNSBHGJ\n+AJEDMWQPEGCRLJ\n+AJFCALALFFWFPQ\n+AJHGAFZLDMJSTN\n+AJIPIJNNOJSSQC\n+AJJJRSKTRLDSEB\n+AJMKPZXRIKVSAQ\n+AJMQODPAUXMBSS\n+AJNAQQPNRPQYTI\n+AJPNSKWVBMZHRH\n+AJSGWTLZEUBGFV\n+AJTWDECGQXIMQX\n+AJWRNFIZKHPOHC\n+AJZJWTXGQXUPNB\n+AJZZJDXLAWTANY\n+AKASWINDKIEEBO\n+AKCWKACKPGSALW\n+AKEUNCKRJATALU\n+AKFASBOHJPPIRI\n+AKFIXMYXISUTAF\n+AKJQUGNUJATZNP\n+AKKGSBVONASWJC\n+AKLPKLNHHIOFBG\n+AKLSXLVJISBMOO\n+AKLZJACFWIKHEV\n+AKMAERBYSDYONJ\n+AKNYLHJSMLROIV\n+AKQKAMNRTWTCFB\n+AKRJETUMTUDQFT\n+ALAJEHHXYIGXNY\n+ALCNBPLBBDQOQU\n+ALEADBNEUMOAQO\n+ALEFWKBEHYAYBG\n+ALERZNQPBWWLMW\n+ALEXXDVDDISNDU\n+ALGKOJZNIQBASA\n+ALHHAXMXDXKBQY\n+ALHUZKCOMYUFRB\n+ALIAPFPESXJZKP\n+ALIDLIQYGLJLJE\n+ALIFBGCDGDBHLU\n+ALIUJDACFKDRMB\n+ALLCCEWBLKBKBO\n+ALQFMXSFTOUFST\n+ALRFYJWUVHBXLV\n+ALSTYHKOOCGGFT\n+ALSTZYUHBBCSGR\n+ALTBXZDGDFVFGO\n+ALVDKZRKCLTXIO\n+ALWIMEGFKSOEQI\n+ALYLMNAEGJISEJ\n+ALZIGHRHGDHKPD\n+AMAJESHGZRCEHZ\n+AMBNTPYCDHUUJH\n+AMHXSCQUXNYYCR\n+AMIPWEKLJVRITL\n+AMLDFCQNEGUMOX\n+AMMYVOPRVWBAML\n+AMNAZJFEONUVTD\n+AMQBTDIMCBUNRJ\n+AMRVXYRVPBLMCN\n+AMSCMASJCYVAIF\n+AMTNJYJZXRRIJG\n+AMVCEZYBLCNDSF\n+AMVHSCBIZLVSPF\n+AMZPOLFWIKSMLX\n+ANASGKSCMVYISY\n+ANAWXDDUSIXATR\n+ANBZFYYSBLEBRA\n+ANEHLYCXABAELI\n+ANEOWDWLJGVECN\n+ANFCLBKOBMSSLU\n+ANFRDTPJAZBUTA\n+ANIGIRFDJFSBKE\n+ANIQKNXCSIWOOD\n+ANKNKXVIQSJVBN\n+ANQMUIJXUXKAPB\n+ANUFIKIFSCIVQK\n+ANUPVXRMSNOEBS\n+ANVAOWXLWRTKGA\n+ANVKISRMXPTBPA\n+AOAPCDXPLWGVGI\n+AOFZTNZDGDRGLR\n+AOIMAYBIPUIYPH\n+AOJYFODHRHWWEN\n+AOKAQJSFMVTCOD\n+AOLKDXFBABOMHP\n+AOLLRBVQJZTTPF\n+AOLVYXTXTDRBOS\n+AOQMHHBFIJHEPD\n+AOSFEHKJTMLTGH\n+AOVBTSUINBPCOS\n+AOZKTJDXDYSFAE\n+APBDVJVONHAIGM\n+APBGEUPGIRVPDX\n+APBXZPBHXNENAA\n+APDXFEASFJBTAO\n+APEPCFIVJSRFRT\n+APEWVGSHJHRBOP\n+APFBWMGEGSELQP\n+APFQRQZNBLNKDL\n+APGGFZJGNASKNI\n+APIHCQSTUUDSHL\n+APISCUCTJAAPNB\n+APIWMBUGOVSRBI\n+APIXJZVCGREBPH\n+APJXMEVQVACVMD\n+APLKWZASYUZSBL\n+APMKESKZWHNIDJ\n+APTJMRSPVFSVHZ\n+APUHCCTYZGXKTG\n+AQBDNVSAYLLLQP\n+AQCWDMYAXGZHOF\n+AQDGYKAHAWRIDJ\n+AQFDLPKDGJJOGK\n+AQJGDWRSGSISRW\n+AQLWXOUEHFQEHZ\n+AQOMLMMSAVDODV\n+AQQLNCPOJXZEMK\n+AQTXQYBXCGBTNS\n+AQWASCRJKUJRIP\n+AQWPITGEZPPXTJ\n+AQXLVXFWCPCGOX\n+AQXTXYPAJZSNIZ\n+AQXXXWQVIBWEMN\n+AQYMPYYJWAIFLS\n+ARBHXJXXV'..b'YBBXPJPYMJIXBD\n+YBBZWBTVSIDANF\n+YBDUMXZBKBTNGS\n+YBDUXZKWDIUNSG\n+YBLJHUNODSZAQU\n+YBPCEJJDLIQWTB\n+YBTHHPIEZVFXLZ\n+YCBMXIIYHMNHDU\n+YCBSMEKEDOHEQI\n+YCHHZTNKPZYPLQ\n+YCHMIHRTWFOENV\n+YCIHCWCXBRFWKZ\n+YCIUCIBXUZOYMY\n+YCNKTCKWWJKFGH\n+YCWBXVBPBPUAQF\n+YDDGKXBLOXEEMN\n+YDFMRHVTUVJMHS\n+YDGWQGCHRDUSIY\n+YDHWOAQKEXVUAU\n+YDKWKCQTFWTPEG\n+YDOIHIWSLMXTHV\n+YDPXEIXLONTGCJ\n+YDTKGRYLDMLMJF\n+YDUFVPKWFQXSAZ\n+YDXRAKCWBNTQEB\n+YEWSFUFGMDJFFG\n+YEXYWLVVYFJMOU\n+YFDBMIHFHLSZBY\n+YFRYJFMFQOBOSY\n+YFXSGYVMBQHZCY\n+YGBRAWBXISPFCJ\n+YGCNNWSWMYDUKD\n+YGILXMANNHJYJZ\n+YGTMYGLEXKXTMS\n+YHCNATPUYZXDQB\n+YHGVURGGBNMVRL\n+YHNNPKUFPWLTOP\n+YIFMSNBQADOPBX\n+YIHMBRXHQJWKMY\n+YIHMXHFAOIEYBW\n+YIYDRDIIVCKVSU\n+YJBZWKGFYVMPDY\n+YJCNIIKXRAQFAR\n+YJHPUIVTIUFFBX\n+YJHUGXJKGUZNQB\n+YJOQPCPEUGDGFS\n+YJQKJSVCJWXGTP\n+YJSQLUZICYKYBA\n+YKAHIJVOBFAVKQ\n+YKCVIZVITGNXAR\n+YKJBGAMSFYITAF\n+YKKFSDKZBGDWDC\n+YLAMTMNJXPWCQN\n+YLHIHCIAPXKKGH\n+YLLQOTIGQVCOEH\n+YLTWYAXWDLZZCU\n+YLUOVOKBMSLYGX\n+YMFJMBLNVJHUFO\n+YMOUBIDEGPSLHS\n+YMUOZXZDDBRJEP\n+YOFGZLBDSWEFIM\n+YOKYSHXYBLMBOT\n+YOQHLDOEJYEQRA\n+YORRNUUCBAOIGW\n+YOTJBGVSVFKNEU\n+YOZOWEBNQFOSGC\n+YPCMOCVARABHRT\n+YPJDNPTZPPPTEV\n+YPPPTXCDCPVIBS\n+YPRJBGUXDFJVIG\n+YPUPRVWRYDPGCW\n+YPVGGTXGOCDLRH\n+YPWHZCPMOQGCDQ\n+YPWSLBHSMIKTPR\n+YQCOGGGDJXBMBU\n+YQDKULBMDMPFLH\n+YQJGNDLMPFXISW\n+YQLKOOGRXMGOOT\n+YQMNZWBBBKGAKW\n+YQMWQSMYVPLYDI\n+YQQUILZPDYJDQJ\n+YQWMVXCSRQHQOE\n+YQXGMCISEUTANJ\n+YRHFOCFOBZVGPU\n+YRHVBEUEHIVQFL\n+YROWDXFQJCHNIN\n+YRROXDJRDFXNKD\n+YSCJAYPKBYRXEZ\n+YSJWHDZQHDVBNW\n+YSMQGBBJPVUXEX\n+YSOWNOZZPRSGJI\n+YSPMZADUIBWNLR\n+YSRYKTUWDVLRLA\n+YTHNLQXRJHZHMM\n+YTWQDQXQXNAHQJ\n+YTXBUMDWPQREKF\n+YUAFWDJFJCEDHL\n+YUFOLFJAYLDZGK\n+YUGARAKKPWYQJE\n+YUIGSRGRYOBFRF\n+YUKCLPPRYNXRAF\n+YUYZREYFRPXMSZ\n+YVGCHYVFHLTZIM\n+YVHRNKWYKHUPFK\n+YVIVANDTNNUXRH\n+YVMAYTYEFBTXFR\n+YWPWZARSDRFVFD\n+YWQJKAJFLCNHPB\n+YWTDTKWYDRWAKX\n+YXPSTSCHAVSCKS\n+YXTMOGKDCHGEMG\n+YXTPTAIGIJPTGR\n+YXYBPDUPPKSWRM\n+YYFLJGNYBJWKAO\n+YYHGQOLZRYICRS\n+YYIUHLPAZILPSG\n+YYJRTJYCOMIDIC\n+YYNAEWANGMBTOJ\n+YYPUQBCQRSMSKU\n+YYWGABLTRMRUIT\n+YYWUABJYAOCACI\n+YYZQPDQBCGZPBO\n+YZNUQTVBWSAEBN\n+YZYZCVQQKYLXMA\n+ZAHCAJSTBYCUIW\n+ZAIDBNLLZLYWGI\n+ZAIGREUAQVJOJF\n+ZAJQJIRKXGWHAG\n+ZALJPNQAPQYGQQ\n+ZANCPCGYQSRSLK\n+ZANZUZKNPKKTQM\n+ZAOVUVVHBJNSPP\n+ZBFSMPOCZMVCMC\n+ZBLQOEZPBPYJFU\n+ZBNDWJVBOJZWDW\n+ZBNYDADZMLZTAX\n+ZCDXFTYPYJCBBM\n+ZCEYOMXMDDYIQP\n+ZCHAUSWCWZYCNG\n+ZCMJUAGNOJTZBJ\n+ZCNGYPFWHGRPSJ\n+ZCTBULGQERTBEG\n+ZDFUASMRJUVZJP\n+ZDOLIOLHSICTGE\n+ZECWSTUXOFQBRU\n+ZELUXPWDPVXUEI\n+ZENRAMDHYOLFQK\n+ZEOKDWPZNMUQNT\n+ZEQDLXBOAUGOBB\n+ZEQWNCCUXZWGMJ\n+ZESJTWVSXGZYTD\n+ZETBADSRSFPSFF\n+ZEWFPWKROPWRKE\n+ZFJHEGLUSLQBNU\n+ZFLNGJFWUHGMKC\n+ZFOBGKZKFOAYTR\n+ZFUYDSOHVJVQNB\n+ZGEFAXVVRGCCEZ\n+ZGJXYRPLBATJNC\n+ZGRAFMJBVGBGRY\n+ZGYAWGWSDGHMHA\n+ZHKRZTOJPXQAJI\n+ZHQNPYUTOBOVDN\n+ZHSYALRVBJDBNY\n+ZHUNNEPKAYTEID\n+ZHWHQGPOENZPRA\n+ZIEFIDANMCIAOW\n+ZIGRABNSHCKEKX\n+ZIRVZLYXPGUBLW\n+ZIUKDKVKRXUKAP\n+ZJJLCEATKJINRL\n+ZKBGKWZSOPPDSD\n+ZKCSFQZJDZSMCH\n+ZKCUSCDHGAMOSP\n+ZKEMVUBEPDXJPL\n+ZKILOUUCRGDELO\n+ZLJJLKHHPMWSAQ\n+ZLKWMOWLCYLBCC\n+ZLOYHDGVJFCYJK\n+ZLSVDVPVDVZXQC\n+ZLTRKVJQLZPSSD\n+ZMCFCLSHQFQSGT\n+ZMDNPAXGEYVRDA\n+ZMFVAIFXJWEOMH\n+ZMKJASJBEHWBSJ\n+ZMPRZZMCSIVQNO\n+ZNOWAPXQORXMEX\n+ZNTWWRJQOSQMFP\n+ZOBAYWQPWZMJPP\n+ZOFGKJCFKDDCBO\n+ZOJSSXJDOHFOTI\n+ZOOUWANGAQIDTE\n+ZOPQSLIVXIRLGJ\n+ZORBIEXXHYHNFM\n+ZOTJLEITPRLHBN\n+ZOVBLVRDCOPISO\n+ZPCROTWCQCXAOO\n+ZPDDZABRYUKCAW\n+ZPJGTPAAEPXBQT\n+ZPNVTCIACICSQA\n+ZPPZJVCHAOWCGT\n+ZPQHNIHJSIZREW\n+ZPRURHACUXSVGY\n+ZPVLUTBGTWEMGV\n+ZPWBUZCVOZVOLS\n+ZPZUGHMDLKWJOU\n+ZQAHVKVXUSYIBF\n+ZQGMKFHEDFDRLZ\n+ZQLBCAUKNYXILZ\n+ZQWAPPQJKIEPOV\n+ZRCODBBSMJOEAY\n+ZRNXEMIDBIPJDC\n+ZROMRLOBUYCHGL\n+ZRPVSFCWSDCKJC\n+ZSBXGIUJOOQZMP\n+ZSCSHJBZNPXUQJ\n+ZSQHZTSVIIYJNV\n+ZSTCCLUBWBHJMP\n+ZSVGHBSAINKGKE\n+ZSWDGBBFVZMLRN\n+ZSXALDGJPAPDLM\n+ZSYPWSSGRVZENH\n+ZSZBLSHAQNIIEE\n+ZSZJXLIMGWEGIK\n+ZTEVTZPJEDLPIC\n+ZTMMKJBOOFCGTD\n+ZUOFIYSMCLKBAS\n+ZVFMDVFPBVFGPG\n+ZVFVTBSWJWONEI\n+ZVGVQWPURHXQNI\n+ZVMLLPSSQZSZOA\n+ZVTBDFVDOBOUEV\n+ZVUXDKNPHFTWRA\n+ZVYOCLMPZOESDV\n+ZVYTUNZEFGUQNZ\n+ZWAMUWXPVAELPG\n+ZWBQQMVUSLJQFA\n+ZWEVIFVSDFJDJS\n+ZWTGJCOSOVVWJL\n+ZWYLCRROCKGHBE\n+ZXUKAZPCOMPMHC\n+ZXUKGOMRBKEKEX\n+ZXXUDRYOCINBKT\n+ZYAIBKURUKIHGU\n+ZYSSIHVSPYQDOA\n+ZYTJBCOGBFUTOP\n+ZYWHCDRAABRVKM\n+ZYXVOZNURJLMFP\n+ZZAJNFHIGVVCFT\n+ZZCMFFGGLCGPHY\n'
b
diff -r 000000000000 -r fd5c0b39569a config.ini
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/config.ini Wed Feb 05 12:30:06 2020 -0500
[
@@ -0,0 +1,9 @@
+[MetChem]
+LocalMetChemDatabase: metchem
+LocalMetChemDatabasePortNumber: 5432
+LocalMetChemDatabaseUser: metchem
+LocalMetChemDatabasePassword: metchem
+
+
+
+
b
diff -r 000000000000 -r fd5c0b39569a macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Wed Feb 05 12:30:06 2020 -0500
[
@@ -0,0 +1,20 @@
+<macros>
+    <xml name="text-alphanumeric-regex-validator">
+        <validator type="regex" message="Value may include alphanumeric characters">[A-Za-z0-9]+</validator>
+    </xml>
+    <xml name="text-alphanumeric-comma-regex-validator">
+        <validator type="regex" message="Value may include alphanumeric characters and commas">[A-Za-z0-9,]+</validator>
+    </xml>
+    <xml name="metfrag_scoring" token_fragmenterscore='True' token_offlinemetfusionscore='True'
+         token_suspectlistscore="True" token_weights="0.4,0.6,1.0">
+        <param argument="--MetFragScoreTypes" type="select" multiple="true" label="MetFrag Score Types"
+                       help="The type of scores MetFrag is calculating" >
+           <option value="FragmenterScore" selected="@FRAGMENTERSCORE@">FragmenterScore</option>
+           <option value="OfflineMetFusionScore" selected="@OFFLINEMETFUSIONSCORE@">OfflineMetFusionScore</option>
+           <option value="SuspectListScore" selected="@SUSPECTLISTSCORE@">SuspectListScore</option>
+        </param>
+        <param name="MetFragScoreWeights" type="text" value="@WEIGHTS@" label="MetFrag Score Weights"
+                       help="The weights of the different score types, separated with a comma and without
+                       whitespaces. 1.0 means 100 percent." />
+    </xml>
+</macros>
\ No newline at end of file
b
diff -r 000000000000 -r fd5c0b39569a metfrag.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/metfrag.py Wed Feb 05 12:30:06 2020 -0500
[
b'@@ -0,0 +1,520 @@\n+from __future__ import absolute_import, print_function\n+\n+import ConfigParser\n+import argparse\n+import csv\n+import glob\n+import multiprocessing\n+import os\n+import re\n+import shutil\n+import sys\n+import tempfile\n+from collections import defaultdict\n+\n+import six\n+\n+print(sys.version)\n+\n+parser = argparse.ArgumentParser()\n+parser.add_argument(\'--input_pth\')\n+parser.add_argument(\'--result_pth\', default=\'metfrag_result.csv\')\n+\n+parser.add_argument(\'--temp_dir\')\n+parser.add_argument(\'--polarity\', default=\'pos\')\n+parser.add_argument(\'--minMSMSpeaks\', default=1)\n+\n+parser.add_argument(\'--MetFragDatabaseType\', default=\'PubChem\')\n+parser.add_argument(\'--LocalDatabasePath\', default=\'\')\n+parser.add_argument(\'--LocalMetChemDatabaseServerIp\', default=\'\')\n+\n+parser.add_argument(\'--DatabaseSearchRelativeMassDeviation\', default=5)\n+parser.add_argument(\'--FragmentPeakMatchRelativeMassDeviation\', default=10)\n+parser.add_argument(\'--FragmentPeakMatchAbsoluteMassDeviation\', default=0.001)\n+parser.add_argument(\'--NumberThreads\', default=1)\n+parser.add_argument(\'--UnconnectedCompoundFilter\', action=\'store_true\')\n+parser.add_argument(\'--IsotopeFilter\', action=\'store_true\')\n+\n+parser.add_argument(\'--FilterMinimumElements\', default=\'\')\n+parser.add_argument(\'--FilterMaximumElements\', default=\'\')\n+parser.add_argument(\'--FilterSmartsInclusionList\', default=\'\')\n+parser.add_argument(\'--FilterSmartsExclusionList\', default=\'\')\n+parser.add_argument(\'--FilterIncludedElements\', default=\'\')\n+parser.add_argument(\'--FilterExcludedElements\', default=\'\')\n+parser.add_argument(\'--FilterIncludedExclusiveElements\', default=\'\')\n+\n+parser.add_argument(\'--score_thrshld\', default=0)\n+parser.add_argument(\'--pctexplpeak_thrshld\', default=0)\n+parser.add_argument(\'--schema\')\n+parser.add_argument(\'--cores_top_level\', default=1)\n+parser.add_argument(\'--chunks\', default=1)\n+parser.add_argument(\'--meta_select_col\', default=\'name\')\n+parser.add_argument(\'--skip_invalid_adducts\', action=\'store_true\')\n+\n+parser.add_argument(\'--ScoreSuspectLists\', default=\'\')\n+parser.add_argument(\'--MetFragScoreTypes\',\n+                    default="FragmenterScore,OfflineMetFusionScore")\n+parser.add_argument(\'--MetFragScoreWeights\', default="1.0,1.0")\n+\n+args = parser.parse_args()\n+print(args)\n+\n+config = ConfigParser.ConfigParser()\n+config.read(\n+    os.path.join(os.path.dirname(os.path.abspath(__file__)), \'config.ini\'))\n+\n+if os.stat(args.input_pth).st_size == 0:\n+    print(\'Input file empty\')\n+    exit()\n+\n+# Create temporary working directory\n+if args.temp_dir:\n+    wd = args.temp_dir\n+else:\n+    wd = tempfile.mkdtemp()\n+\n+if os.path.exists(wd):\n+    shutil.rmtree(wd)\n+    os.makedirs(wd)\n+else:\n+    os.makedirs(wd)\n+\n+######################################################################\n+# Setup regular expressions for MSP parsing dictionary\n+######################################################################\n+regex_msp = {}\n+regex_msp[\'name\'] = [r\'^Name(?:=|:)(.*)$\']\n+regex_msp[\'polarity\'] = [r\'^ion.*mode(?:=|:)(.*)$\',\n+                         r\'^ionization.*mode(?:=|:)(.*)$\',\n+                         r\'^polarity(?:=|:)(.*)$\']\n+regex_msp[\'precursor_mz\'] = [r\'^precursor.*m/z(?:=|:)\\s*(\\d*[.,]?\\d*)$\',\n+                             r\'^precursor.*mz(?:=|:)\\s*(\\d*[.,]?\\d*)$\']\n+regex_msp[\'precursor_type\'] = [r\'^precursor.*type(?:=|:)(.*)$\',\n+                               r\'^adduct(?:=|:)(.*)$\',\n+                               r\'^ADDUCTIONNAME(?:=|:)(.*)$\']\n+regex_msp[\'num_peaks\'] = [r\'^Num.*Peaks(?:=|:)\\s*(\\d*)$\']\n+regex_msp[\'msp\'] = [r\'^Name(?:=|:)(.*)$\']  # Flag for standard MSP format\n+\n+regex_massbank = {}\n+regex_massbank[\'name\'] = [r\'^RECORD_TITLE:(.*)$\']\n+regex_massbank[\'polarity\'] = [r\'^AC\\$MASS_SPECTROMETRY:\\s+ION_MODE\\s+(.*)$\']\n+regex_massbank[\'precursor_mz\'] = [\n+    r\'^MS\\$FOCUSED_ION:\\s+PRECURSOR_M/Z\\s+(\\d*[.,]?\\d*)$\']\n+regex_massbank[\'precursor_type\'] = [\n+    r\'^MS\\$FOCUSED_ION:\\s+PRECURSOR_TYPE\\s+(.*)$\']\n+regex_massbank[\'num_peaks\'] = [r\'^PK\\$NUM_PEAK:\\s+(\\d*)\']\n'..b'nd filter the output\n+######################################################################\n+# outputs might have different headers. Need to get a list of all the\n+# headers before we start merging the files\n+# outfiles = [os.path.join(wd, f) for f in glob.glob(os.path.join(wd,\n+# "*_metfrag_result.csv"))]\n+outfiles = glob.glob(os.path.join(wd, "*_metfrag_result.csv"))\n+\n+if len(outfiles) == 0:\n+    print(\'No results\')\n+    sys.exit()\n+\n+headers = []\n+c = 0\n+for fn in outfiles:\n+    with open(fn, \'r\') as infile:\n+        reader = csv.reader(infile)\n+        if sys.version_info >= (3, 0):\n+            headers.extend(next(reader))\n+        else:\n+            headers.extend(reader.next())\n+        # check if file has any data rows\n+        for i, row in enumerate(reader):\n+            c += 1\n+            if i == 1:\n+                break\n+\n+# if no data rows (e.g. matches) then do not save an\n+# output and leave the program\n+if c == 0:\n+    print(\'No results\')\n+    sys.exit()\n+\n+additional_detail_headers = [\'sample_name\']\n+for k, paramd in six.iteritems(paramds):\n+    additional_detail_headers = list(set(\n+        additional_detail_headers + list(paramd[\'additional_details\'].keys())))\n+\n+# add inchikey if not already present (missing in metchem output)\n+if \'InChIKey\' not in headers:\n+    headers.append(\'InChIKey\')\n+\n+headers = additional_detail_headers + sorted(list(set(headers)))\n+\n+# Sort files nicely\n+outfiles.sort(\n+    key=lambda s: int(re.match(r\'^.*/(\\d+)_metfrag_result.csv\', s).group(1)))\n+\n+print(outfiles)\n+\n+# merge outputs\n+with open(args.result_pth, \'a\') as merged_outfile:\n+    dwriter = csv.DictWriter(merged_outfile, fieldnames=headers,\n+                             delimiter=\'\\t\', quotechar=\'"\')\n+    dwriter.writeheader()\n+\n+    for fn in outfiles:\n+\n+        with open(fn) as infile:\n+            reader = csv.DictReader(infile, delimiter=\',\', quotechar=\'"\')\n+            for line in reader:\n+                bewrite = True\n+                for key, value in line.items():\n+                    # Filter when no MS/MS peak matched\n+                    if key == "ExplPeaks":\n+                        if float(args.pctexplpeak_thrshld) > 0 and \\\n+                                "NA" in value:\n+                            bewrite = False\n+                    # Filter with a score threshold\n+                    elif key == "Score":\n+                        if float(value) <= float(args.score_thrshld):\n+                            bewrite = False\n+                    elif key == "NoExplPeaks":\n+                        nbfindpeak = float(value)\n+                    elif key == "NumberPeaksUsed":\n+                        totpeaks = float(value)\n+                # Filter with a relative number of peak matched\n+                try:\n+                    pctexplpeak = nbfindpeak / totpeaks * 100\n+                except ZeroDivisionError:\n+                    bewrite = False\n+                else:\n+                    if pctexplpeak < float(args.pctexplpeak_thrshld):\n+                        bewrite = False\n+\n+                # Write the line if it pass all filters\n+                if bewrite:\n+                    bfn = os.path.basename(fn)\n+                    bfn = bfn.replace(".csv", "")\n+                    line[\'sample_name\'] = paramds[bfn][\'SampleName\']\n+                    ad = paramds[bfn][\'additional_details\']\n+\n+                    if args.MetFragDatabaseType == "MetChem":\n+                        # for some reason the metchem database option does\n+                        # not report the full inchikey (at least in the Bham\n+                        # setup. This ensures we always get the fully inchikey\n+                        line[\'InChIKey\'] = \'{}-{}-{}\'.format(line[\'InChIKey1\'],\n+                                                             line[\'InChIKey2\'],\n+                                                             line[\'InChIKey3\'])\n+\n+                    line.update(ad)\n+                    dwriter.writerow(line)\n'
b
diff -r 000000000000 -r fd5c0b39569a metfrag.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/metfrag.xml Wed Feb 05 12:30:06 2020 -0500
[
b'@@ -0,0 +1,473 @@\n+<tool id="metfrag" name="MetFrag" version="2.4.5+galaxy1">\n+    <description>\n+        in silico fragmentor for compound annotation of mass spectrometry fragmentation spectra\n+    </description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <requirements>\n+        <requirement type="package" version="2.4.5">metfrag</requirement>\n+    </requirements>\n+    <stdio>\n+        <regex match="Cannot allocate memory"\n+           source="stderr"\n+           level="fatal_oom"\n+           description="Out of memory error occurred" />\n+    </stdio>\n+    <command detect_errors="exit_code">\n+    <![CDATA[\n+        python \'$__tool_directory__/metfrag.py\'\n+            --input_pth \'$input\'\n+            --result_pth \'$results\'\n+             --temp_dir \'./temp/\'\n+\n+            --cores_top_level \\${GALAXY_SLOTS:-4}\n+\n+            --MetFragDatabaseType \'$db_select.MetFragDatabaseType\'\n+\n+            #if $db_select.MetFragDatabaseType == \'LocalCSV\':\n+                --LocalDatabasePath \'$db_select.LocalDatabasePath\'\n+            #elif  $db_select.MetFragDatabaseType == \'MetChem\':\n+                --LocalMetChemDatabaseServerIp \'$db_select.LocalMetChemDatabaseServerIp\'\n+            #end if\n+\n+            --DatabaseSearchRelativeMassDeviation $DatabaseSearchRelativeMassDeviation\n+            --FragmentPeakMatchRelativeMassDeviation $FragmentPeakMatchRelativeMassDeviation\n+            --FragmentPeakMatchAbsoluteMassDeviation $FragmentPeakMatchAbsoluteMassDeviation\n+            --polarity \'$polarity\'\n+\n+            --MetFragScoreTypes \'$suspectlist.MetFragScoreTypes\'\n+            --MetFragScoreWeights \'$suspectlist.MetFragScoreWeights\'\n+\n+            #if $suspectlist.suspectselector == \'includesuspects\':\n+                #if $suspectlist.includesuspects_default_cond:\n+                    --ScoreSuspectLists \'$__tool_directory__/UNPD_DB.inchikeys.txt\'\n+                #else\n+                    --ScoreSuspectLists \'$suspectlist.includesuspects_custom_cond.ScoreSuspectLists\'\n+                #end if\n+            #end if\n+\n+            --meta_select_col $meta_select_col\n+            --minMSMSpeaks $minMSMSpeaks\n+            --schema $schema\n+\n+            $PreProcessFilter.UnconnectedCompoundFilter\n+            $PreProcessFilter.IsotopeFilter\n+\n+            --FilterMinimumElements \'$PreProcessFilter.FilterMinimumElements\'\n+            --FilterMaximumElements \'$PreProcessFilter.FilterMaximumElements\'\n+            --FilterSmartsInclusionList \'$PreProcessFilter.FilterSmartsInclusionList\'\n+            --FilterSmartsExclusionList \'$PreProcessFilter.FilterSmartsExclusionList\'\n+            --FilterIncludedElements \'$PreProcessFilter.FilterIncludedElements\'\n+            --FilterExcludedElements \'$PreProcessFilter.FilterExcludedElements\'\n+            --FilterIncludedExclusiveElements \'$PreProcessFilter.FilterIncludedExclusiveElements\'\n+\n+            $skip_invalid_adducts\n+            --score_thrshld $PostProcessFilter.score_thrshld\n+            --pctexplpeak_thrshld $PostProcessFilter.pctexplpeak_thrshld\n+\n+    ]]></command>\n+    <inputs>\n+        <param name="input" type="data" format="msp" label="MSP file (Output from Create MSP tool)"/>\n+        <conditional name="db_select">\n+            <param argument="--MetFragDatabaseType" type="select" label="Choose Compound Database">\n+                <option value="PubChem" selected="true">PubChem</option>\n+                <option value="KEGG">KEGG</option>\n+                <option value="LocalCSV">Local database (csv)</option>\n+                <option value="MetChem">MetChem</option>\n+            </param>\n+            <when value="MetChem">\n+                <param argument="--LocalMetChemDatabaseServerIp" type="text" label="MetChem URL"/>\n+            </when>\n+            <when value="LocalCSV">\n+                <param argument="--LocalDatabasePath" type="data" format="csv"\n+                       label="Local database of compounds (CSV format)" />\n+            </when>\n+     '..b'(hydroxymethyl)oxane-2,3,4,5-tetrol                |  206       | InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H                        |...|\n++---+------------------+----------------------------+------------------------------------------------------+------------+---------------------------------------------------------------------------------+---+\n+|...| 105.844569063138 | 696.0;1156.0;696.0;1156.0  | (3R,4S,5S,6R)-6-(hydroxymethyl)oxane-2,3,4,5-tetrol  |  5793      | InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1 |...|\n++---+------------------+----------------------------+------------------------------------------------------+------------+---------------------------------------------------------------------------------+---+\n+|...| ...              | ...                        | ...                                                  | ...        | ...                                                                             |...|\n++---+------------------+----------------------------+------------------------------------------------------+------------+---------------------------------------------------------------------------------+---+\n+\n+Table continued (columns are derived from the MetFrag result):\n+\n+\n++---+-------------+-----------------+-----------------------+----------------------------------------------+------------------+------------------+--------+\n+|...| NoExplPeaks | NumberPeaksUsed | OfflineMetFusionScore | SMILES\t                                   | Score            | SuspectListScore | XlogP3 |\n++---+-------------+-----------------+-----------------------+----------------------------------------------+------------------+------------------+--------+\n+|...| 4           | 5\t            | 2.84566828424078\t    | C(C1C(C(C(C(O1)O)O)O)O)O                     | 1.82678219603441 | 1                | -2.6   |\n++---+-------------+-----------------+-----------------------+----------------------------------------------+------------------+------------------+--------+\n+|...| 4           | 5               | 2.84566828424078      | C([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O   | 1.82678219603441 | 1                | -2.6   |\n++---+-------------+-----------------+-----------------------+----------------------------------------------+------------------+------------------+--------+\n+|...| ...         | ...             | ...                   | ...                                          | ...              | ...              | ...    |\n++---+-------------+-----------------+-----------------------+----------------------------------------------+------------------+------------------+--------+\n+\n+\n+Additional notes\n+--------------------\n+\n+The following adducts (and format) are currently supported in the MSP file. The neutral mass is automatically\n+ calculated for the precursor m/z by subtracting the adduct mass\n+\n+- \'[M+H]+\': 1.007276,\n+- \'[M+NH4]+\': 18.034374,\n+- \'[M+Na]+\': 22.989218,\n+- \'[M+K]+\': 38.963158,\n+- \'[M+CH3OH+H]+\': 33.033489,\n+- \'[M+ACN+H]+\': 42.033823,\n+- \'[M+ACN+Na]+\': 64.015765,\n+- \'[M+2ACN+H]+\': 83.06037,\n+- \'[M-H]-\': -1.007276,\n+- \'[M+Cl]-\': 34.969402,\n+- \'[M+HCOO]-\': 44.99819,\n+- \'[M-H+HCOOH]-\': 44.99819,\n+- \'[M+CH3COO]-\': 59.01385,\n+- \'[M-H+CH3COOH]-\': 59.01385\n+\n+Developers and contributors\n+---------------------------\n+- **Jordi Capellades (j.capellades.to@gmail.com) - Universitat Rovira i Virgili (SP)**\n+- **Julien Saint-Vanne (julien.saint-vanne@sb-roscoff.fr) - ABiMS (France)**\n+- **Tom Lawson (t.n.lawson@bham.ac.uk) - University of Birmingham (UK)**\n+- **Ralf Weber (r.j.weber@bham.ac.uk) - University of Birmingham (UK)**\n+- **Kristian Peters (kpeters@ipb-halle.de) - IPB Halle (Germany)**\n+- **Payam Emami (payam.emami@medsci.uu.se) - Uppsala Universitet (Sweden)**\n+- **Christoph Ruttkies (christoph.ruttkies@ipb-halle.de) - IPB Halle (Germany)**\n+    </help>\n+    <citations>\n+        <citation type="doi">10.1186/s13321-016-0115-9</citation>\n+    </citations>\n+</tool> \n'
b
diff -r 000000000000 -r fd5c0b39569a test-data/A06_spec_trees_merged_pls.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/A06_spec_trees_merged_pls.msp Wed Feb 05 12:30:06 2020 -0500
[
@@ -0,0 +1,87 @@
+RECORD_TITLE: 12_15: FTMS + p NSI d Full ms2 353.04@hcd20.00 [50.00-365.00]
+AC$MASS_SPECTROMETRY: ION_MODE NA
+MS$FOCUSED_ION: PRECURSOR_M/Z  353.040466309
+MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+
+CH$FORMULA: C8H24N3P2S4
+AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE hcd
+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 20.00
+PK$NUM_PEAK: 2
+PK$ANNOTATION: m/z tentative_formula formula_count adduct
+137.0456324 CH15NPS2 137.0456324
+196.0252314 C5H12N2PS2 196.0252314
+PK$PEAK: m/z int. rel.int.
+137.0456324 271.65788269 62.8779325411
+196.0252314 432.040100098 100.0
+
+RECORD_TITLE: 12_15: FTMS + p NSI d Full ms2 353.04@hcd40.00 [50.00-365.00]
+AC$MASS_SPECTROMETRY: ION_MODE NA
+MS$FOCUSED_ION: PRECURSOR_M/Z  353.040466309
+MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+
+CH$FORMULA: C8H24N3P2S4
+AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE hcd
+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 40.00
+PK$NUM_PEAK: 12
+PK$ANNOTATION: m/z tentative_formula formula_count adduct
+86.0964254 C5H11N 86.0964254
+110.0712734 C5H7N3 110.0712734
+123.0264544 C7H6S,C3H9NPS 123.0264544
+137.0456324 CH15NPS2 137.0456324
+174.9928464 C3H12P2S2 174.9928464
+177.0084964 C3H14P2S2 177.0084964
+186.0411934 H17N3P2S2 186.0411934
+194.0002214 C5H9N2S3,CH12N3PS3 194.0002214
+194.0065214 C4H9N3P2S 194.0065214
+196.0252314 C5H12N2PS2 196.0252314
+196.0286034 C2H16N2PS3 196.0286034
+233.0394884 C6H18NS4 233.0394884
+PK$PEAK: m/z int. rel.int.
+86.0964254 371.63130188 5.91116935971
+110.0712734 591.911804199 9.41495213921
+123.0264544 532.023727417 8.46237208824
+137.0456324 320.305679321 5.09478374874
+174.9928464 6286.93375651 100.0
+177.0084964 1588.13269043 25.2608465738
+186.0411934 348.731552124 5.54692582474
+194.0002214 386.191268921 6.14276026881
+194.0065214 1241.16925049 19.7420443504
+196.0252314 243.325553894 3.87033748593
+196.0286034 720.052612305 11.4531604784
+233.0394884 530.847488403 8.44366282456
+
+RECORD_TITLE: 12_15: FTMS + p NSI d Full ms2 353.04@hcd80.00 [50.00-365.00]
+AC$MASS_SPECTROMETRY: ION_MODE NA
+MS$FOCUSED_ION: PRECURSOR_M/Z  353.040466309
+MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+
+CH$FORMULA: C8H24N3P2S4
+AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE hcd
+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 80.00
+PK$NUM_PEAK: 13
+PK$ANNOTATION: m/z tentative_formula formula_count adduct
+76.0215474 C2H5NS 76.0215474
+109.0760244 C6H8N2 109.0760244
+110.0712734 C5H7N3 110.0712734
+137.0456324 CH15NPS2 137.0456324
+139.0326024 C2H9N3PS,C6H6N2S 139.0326024
+174.9928464 C3H12P2S2 174.9928464
+176.0006714 C3H13P2S2 176.0006714
+177.0084964 C3H14P2S2 177.0084964
+184.0031394 C3H9N3S3 184.0031394
+186.0411934 H17N3P2S2 186.0411934
+194.0002214 C5H9N2S3,CH12N3PS3 194.0002214
+194.0065214 C4H9N3P2S 194.0065214
+195.0078904 C5H10N2S3 195.0078904
+PK$PEAK: m/z int. rel.int.
+76.0215474 379.493642171 3.68804424582
+109.0760244 364.92791748 3.54648973433
+110.0712734 843.262105306 8.19509896767
+137.0456324 338.304992676 3.28775937967
+139.0326024 289.75042216 2.81589006619
+174.9928464 10289.8343099 100.0
+176.0006714 234.601882935 2.2799383923
+177.0084964 225.965415955 2.19600636074
+184.0031394 274.209452311 2.66485780094
+186.0411934 274.260437012 2.66535328706
+194.0002214 490.205210368 4.76397574154
+194.0065214 2064.41691081 20.0626836996
+195.0078904 319.455037435 3.10456930417
+
b
diff -r 000000000000 -r fd5c0b39569a test-data/A06_spec_trees_non_merged_pls.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/A06_spec_trees_non_merged_pls.msp Wed Feb 05 12:30:06 2020 -0500
[
b'@@ -0,0 +1,2693 @@\n+RECORD_TITLE: 1_2: FTMS + p NSI d Full ms2 137.05@hcd40.00 [50.00-150.00]<#>1_2: FTMS + p NSI d Full ms2 137.05@hcd80.00 [50.00-150.00]\n+AC$MASS_SPECTROMETRY: ION_MODE NA\n+MS$FOCUSED_ION: PRECURSOR_M/Z  137.045837402\n+MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+\n+CH$FORMULA: C5H4N4O\n+AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE hcd\n+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 80.00, 40.00\n+PK$NUM_PEAK: 7\n+PK$PEAK: m/z int. rel.int.\n+55.0290744\t22578.4091797\t1.72798505995\n+67.0290744\t17079.2054036\t1.30711652616\n+92.0243234\t35477.718099\t2.71520311056\n+94.0399734\t458944.325358\t35.1242167353\n+110.0348884\t1306632.19857\t100.0\n+119.0352224\t894649.196291\t68.4698568788\n+120.0192384\t35990.6210938\t2.75445692623\n+\n+RECORD_TITLE: 1_1: FTMS + p NSI d Full ms2 137.05@hcd40.00 [50.00-150.00]<#>1_1: FTMS + p NSI d Full ms2 137.05@hcd80.00 [50.00-150.00]\n+AC$MASS_SPECTROMETRY: ION_MODE NA\n+MS$FOCUSED_ION: PRECURSOR_M/Z  137.045837402\n+MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+\n+CH$FORMULA: CH15NPS2\n+AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE hcd\n+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 80.00, 40.00\n+PK$NUM_PEAK: 2\n+PK$PEAK: m/z int. rel.int.\n+110.0347334\t1306632.19857\t100.0\n+120.0190834\t35990.6210938\t2.75445692623\n+\n+RECORD_TITLE: 3_14: FTMS + p NSI d Full ms2 269.09@hcd40.00 [50.00-280.00]\n+AC$MASS_SPECTROMETRY: ION_MODE NA\n+MS$FOCUSED_ION: PRECURSOR_M/Z  269.088043213\n+MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+\n+CH$FORMULA: CH146NO2PS\n+AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE hcd\n+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 40.00\n+PK$NUM_PEAK: 1\n+PK$PEAK: m/z int. rel.int.\n+235.0829854\t5067.24690755\t100.0\n+\n+RECORD_TITLE: 3_11: FTMS + p NSI d Full ms2 269.09@hcd20.00 [50.00-280.00]<#>3_11: FTMS + p NSI d Full ms2 269.09@hcd40.00 [50.00-280.00]<#>3_11: FTMS + p NSI d Full ms2 269.09@hcd80.00 [50.00-280.00]\n+AC$MASS_SPECTROMETRY: ION_MODE NA\n+MS$FOCUSED_ION: PRECURSOR_M/Z  269.088043213\n+MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+\n+CH$FORMULA: C2H26N2O4P2S2\n+AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE hcd\n+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 20.00, 80.00, 40.00\n+PK$NUM_PEAK: 6\n+PK$PEAK: m/z int. rel.int.\n+137.0456324\t5325.18063693\t14.7467753513\n+138.0660334\t8148.97111005\t22.5665671265\n+156.0765984\t36110.8141277\t100.0\n+161.0603304\t1948.28767904\t5.39530255993\n+171.0636884\t3805.06982422\t10.5372030959\n+235.0827244\t5067.24690755\t14.0324914571\n+\n+RECORD_TITLE: 3_10: FTMS + p NSI d Full ms2 269.09@hcd40.00 [50.00-280.00]\n+AC$MASS_SPECTROMETRY: ION_MODE NA\n+MS$FOCUSED_ION: PRECURSOR_M/Z  269.088043213\n+MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+\n+CH$FORMULA: C5H143O2S\n+AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE hcd\n+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 40.00\n+PK$NUM_PEAK: 1\n+PK$PEAK: m/z int. rel.int.\n+235.0826734\t5067.24690755\t100.0\n+\n+RECORD_TITLE: 3_13: FTMS + p NSI d Full ms2 269.09@hcd20.00 [50.00-280.00]<#>3_13: FTMS + p NSI d Full ms2 269.09@hcd40.00 [50.00-280.00]<#>3_13: FTMS + p NSI d Full ms2 269.09@hcd80.00 [50.00-280.00]\n+AC$MASS_SPECTROMETRY: ION_MODE NA\n+MS$FOCUSED_ION: PRECURSOR_M/Z  269.088043213\n+MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+\n+CH$FORMULA: C6H15N5O5P\n+AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE hcd\n+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 20.00, 80.00, 40.00\n+PK$NUM_PEAK: 18\n+PK$PEAK: m/z int. rel.int.\n+70.0651254\t813.814819336\t0.414224977049\n+82.0525494\t2364.79215495\t1.20365954617\n+83.0603744\t1929.60078939\t0.982150759249\n+85.0760244\t944.649047852\t0.480818511618\n+86.0964254\t5916.5789388\t3.01148948987\n+93.0447244\t1327.61175537\t0.675743345822\n+95.0603744\t4523.99552409\t2.30267610962\n+105.0447244\t1210.09474691\t0.615928165544\n+110.0236554\t2863.59191895\t1.45754447906\n+110.0712734\t196466.863281\t100.0\n+121.0396394\t1337.42224121\t0.680736801551\n+137.0458394\t5325.18063693\t2.71047267106\n+138.0661884\t8148.97111005\t4.1477585451\n+150.0524324\t913.132537842\t0.464776870049\n+156.0769094\t36110.8141277\t18.3801041685\n+161.0600184\t1948.28767904\t0.991662230719\n+166.0614154\t959.693511963\t0.488476018772\n+235.0828794\t5067.24690755\t2.57918654725\n+\n+RECORD_TITLE: 3_12: FTMS + p NSI d '..b'\t530.847488403\t52.4668052225\n+233.0953724\t426.534858704\t42.1569694519\n+\n+RECORD_TITLE: 12_17: FTMS + p NSI d Full ms2 353.04@hcd20.00 [50.00-365.00]<#>12_17: FTMS + p NSI d Full ms2 353.04@hcd40.00 [50.00-365.00]\n+AC$MASS_SPECTROMETRY: ION_MODE NA\n+MS$FOCUSED_ION: PRECURSOR_M/Z  353.040466309\n+MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+\n+CH$FORMULA: C14H136OP\n+AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE hcd\n+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 20.00, 40.00\n+PK$NUM_PEAK: 1\n+PK$PEAK: m/z int. rel.int.\n+200.0749394\t424.174110412\t100.0\n+\n+RECORD_TITLE: 12_14: FTMS + p NSI d Full ms2 353.04@hcd20.00 [50.00-365.00]<#>12_14: FTMS + p NSI d Full ms2 353.04@hcd40.00 [50.00-365.00]<#>12_14: FTMS + p NSI d Full ms2 353.04@hcd80.00 [50.00-365.00]\n+AC$MASS_SPECTROMETRY: ION_MODE NA\n+MS$FOCUSED_ION: PRECURSOR_M/Z  353.040466309\n+MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+\n+CH$FORMULA: C5H15N5O9PS\n+AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE hcd\n+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 20.00, 80.00, 40.00\n+PK$NUM_PEAK: 42\n+PK$PEAK: m/z int. rel.int.\n+71.0127564\t207.915283203\t0.0949292619074\n+76.0215474\t379.493642171\t0.173267932953\n+86.0964254\t371.63130188\t0.169678172021\n+105.0004784\t396.980895996\t0.181252204589\n+110.0712734\t717.586954752\t0.327633442427\n+122.0270274\t4137.30795628\t1.88899817523\n+123.0266104\t532.023727417\t0.242909607138\n+123.0304494\t2251.09712728\t1.02779836808\n+124.0227074\t796.784576416\t0.363793226612\n+125.9977524\t231.322723389\t0.105616552355\n+130.0498704\t971.771247862\t0.443688053525\n+136.0760024\t235.365745544\t0.107462501835\n+137.0458394\t310.089518229\t0.141579631075\n+139.0327584\t289.75042216\t0.132293274882\n+140.0134024\t548.998168945\t0.250659740658\n+156.0770654\t1264.3899231\t0.577290905767\n+169.0582464\t596.368469238\t0.272287913315\n+179.0025214\t422.887283325\t0.193080455932\n+179.9947794\t230.910499573\t0.105428340589\n+185.0382384\t330.458770752\t0.150879756\n+195.0073334\t319.455037435\t0.145855708388\n+196.0247434\t337.682826996\t0.154178091344\n+197.0213284\t464.43107605\t0.212048381327\n+199.0716464\t412.669647217\t0.188415321944\n+200.0751374\t336.69682312\t0.153727905005\n+220.9978314\t219021.278608\t100.0\n+223.0131694\t272.505996704\t0.124419873008\n+224.0196584\t1011.77780151\t0.461954111463\n+224.9924344\t242.34072113\t0.110647112769\n+225.0162434\t2299.60510254\t1.04994597655\n+231.0437184\t1878.90701294\t0.85786505534\n+232.0403104\t240.709289551\t0.109902239216\n+233.0390864\t530.847488403\t0.242372563879\n+235.0134814\t866.191558837\t0.395482833605\n+239.0083964\t25619.9534506\t11.6974723248\n+247.0134814\t2169.64108785\t0.990607443093\n+249.0291314\t1576.70782471\t0.719887964644\n+259.0134814\t527.621429443\t0.240899620711\n+263.0083964\t671.352722168\t0.30652397175\n+277.0240464\t787.013214111\t0.359331850819\n+281.0189614\t258.13458252\t0.117858220973\n+307.0346114\t2006.81152344\t0.916263267291\n+\n+RECORD_TITLE: 12_15: FTMS + p NSI d Full ms2 353.04@hcd20.00 [50.00-365.00]<#>12_15: FTMS + p NSI d Full ms2 353.04@hcd40.00 [50.00-365.00]<#>12_15: FTMS + p NSI d Full ms2 353.04@hcd80.00 [50.00-365.00]\n+AC$MASS_SPECTROMETRY: ION_MODE NA\n+MS$FOCUSED_ION: PRECURSOR_M/Z  353.040466309\n+MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+\n+CH$FORMULA: C8H24N3P2S4\n+AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE hcd\n+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 20.00, 80.00, 40.00\n+PK$NUM_PEAK: 18\n+PK$PEAK: m/z int. rel.int.\n+76.0215474\t379.493642171\t4.57862039996\n+86.0964254\t371.63130188\t4.48376065094\n+109.0760244\t364.92791748\t4.40288379518\n+110.0712734\t717.586954752\t8.65774259346\n+123.0264544\t532.023727417\t6.41890777847\n+137.0456324\t310.089518229\t3.74125422986\n+139.0326024\t289.75042216\t3.49586144898\n+174.9928464\t8288.38403321\t100.0\n+176.0006714\t234.601882935\t2.83049002067\n+177.0084964\t907.049053192\t10.9436175925\n+184.0031394\t274.209452311\t3.30835843528\n+186.0411934\t311.495994568\t3.75822347662\n+194.0002214\t438.198239645\t5.28689594846\n+194.0065214\t1652.79308065\t19.9410774649\n+195.0078904\t319.455037435\t3.85424994975\n+196.0252314\t337.682826996\t4.07416965289\n+196.0286034\t720.052612305\t8.6874909442\n+233.0394884\t530.847488403\t6.404716363\n+\n'
b
diff -r 000000000000 -r fd5c0b39569a test-data/RP022611.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/RP022611.tabular Wed Feb 05 12:30:06 2020 -0500
[
@@ -0,0 +1,2 @@
+adduct name sample_name ExplPeaks FormulasOfExplPeaks FragmenterScore FragmenterScore_Values Identifier InChI InChIKey InChIKey1 InChIKey2 InChIKey3 MaximumTreeDepth MolecularFormula MonoisotopicMass Name NoExplPeaks NumberPeaksUsed OfflineMetFusionScore SMILES Score
+[M-H]- D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]- 1_metfrag_result 59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0 59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H- 105.84456906313766 696.0;1156.0;696.0;1156.0 5793 InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1 WQZGKKKJIJFFOK-GASJEMHNSA-N WQZGKKKJIJFFOK GASJEMHNSA N 2 C6H12O6 180.0633881 D-Glucose 4 5 2.8456682842407846 C(C1C(C(C(C(O1)O)O)O)O)O 2.0
b
diff -r 000000000000 -r fd5c0b39569a test-data/RP022611.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/RP022611.txt Wed Feb 05 12:30:06 2020 -0500
[
@@ -0,0 +1,48 @@
+ACCESSION: RP022611
+RECORD_TITLE: D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-
+DATE: 2017.11.29
+AUTHORS: BGC, Helmholtz Zentrum Muenchen
+LICENSE: CC BY
+COPYRIGHT: Copyright (C) 2017
+COMMENT: CONFIDENCE standard compound
+COMMENT: INTERNAL_ID 226
+CH$NAME: D-Glucose
+CH$NAME: (3R,4S,5S,6R)-6-(hydroxymethyl)oxane-2,3,4,5-tetrol
+CH$COMPOUND_CLASS: N/A; Metabolomics Standard
+CH$FORMULA: C6H12O6
+CH$EXACT_MASS: 180.0634
+CH$SMILES: OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O
+CH$IUPAC: InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1
+CH$LINK: CAS 50-99-7
+CH$LINK: CHEBI 4167
+CH$LINK: KEGG C00031
+CH$LINK: PUBCHEM CID:5793
+CH$LINK: INCHIKEY WQZGKKKJIJFFOK-GASJEMHNSA-N
+CH$LINK: CHEMSPIDER 5589
+AC$INSTRUMENT: maXis plus UHR-ToF-MS, Bruker Daltonics
+AC$INSTRUMENT_TYPE: LC-ESI-QTOF
+AC$MASS_SPECTROMETRY: MS_TYPE MS2
+AC$MASS_SPECTROMETRY: ION_MODE NEGATIVE
+AC$MASS_SPECTROMETRY: IONIZATION ESI
+AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE CID
+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 10
+AC$CHROMATOGRAPHY: COLUMN_NAME BEH C18 1.7um, 2.1x100mm, Waters
+AC$CHROMATOGRAPHY: FLOW_GRADIENT 95/5 at 0 min, 95/5 at 1.12 min, 0.5/99.5 at 6.41 min, 0.5/99.5 at 10.01 min
+AC$CHROMATOGRAPHY: FLOW_RATE 400 uL/min
+AC$CHROMATOGRAPHY: RETENTION_TIME 0.604 min
+AC$CHROMATOGRAPHY: SOLVENT A Water with 0.1% formic acid
+AC$CHROMATOGRAPHY: SOLVENT B ACN with 0.1% formic acid
+MS$FOCUSED_ION: BASE_PEAK 179.0572
+MS$FOCUSED_ION: PRECURSOR_M/Z 179.0561
+MS$FOCUSED_ION: PRECURSOR_TYPE [M-H]-
+MS$DATA_PROCESSING: REANALYZE Peaks with additional N2/O included
+MS$DATA_PROCESSING: WHOLE RMassBank 2.4.0
+PK$SPLASH: splash10-059i-9000000000-fd62712fc14434a3aa53
+PK$NUM_PEAK: 5
+PK$PEAK: m/z int. rel.int.
+  59.0138 278 715
+  71.014 264 679
+  72.9928 30 77
+  89.0251 388 999
+  101.0234 40 102
+//
\ No newline at end of file
b
diff -r 000000000000 -r fd5c0b39569a test-data/RP022611_all_col.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/RP022611_all_col.tabular Wed Feb 05 12:30:06 2020 -0500
[
@@ -0,0 +1,2 @@
+polarity adduct massbank name cols num_peaks sample_name precursor_type precursor_mz ExplPeaks FormulasOfExplPeaks FragmenterScore FragmenterScore_Values Identifier InChI InChIKey InChIKey1 InChIKey2 InChIKey3 MaximumTreeDepth MolecularFormula MonoisotopicMass Name NoExplPeaks NumberPeaksUsed OfflineMetFusionScore SMILES Score
+NEGATIVE [M-H]- D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]- D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]- m/z int. rel.int. 5 1_metfrag_result [M-H]- 179.0561 59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0 59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H- 105.84456906313766 696.0;1156.0;696.0;1156.0 5793 InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1 WQZGKKKJIJFFOK-GASJEMHNSA-N WQZGKKKJIJFFOK GASJEMHNSA N 2 C6H12O6 180.0633881 D-Glucose 4 5 2.8456682842407846 C(C1C(C(C(C(O1)O)O)O)O)O 2.0
b
diff -r 000000000000 -r fd5c0b39569a test-data/RP022611_suspect.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/RP022611_suspect.txt Wed Feb 05 12:30:06 2020 -0500
[
b'@@ -0,0 +1,2396 @@\n+name\tsample_name\tExplPeaks\tFormulasOfExplPeaks\tFragmenterScore\tFragmenterScore_Values\tIUPACName\tIdentifier\tInChI\tInChIKey\tInChIKey1\tInChIKey2\tMaximumTreeDepth\tMolecularFormula\tMonoisotopicMass\tNoExplPeaks\tNumberPeaksUsed\tOfflineMetFusionScore\tSMILES\tScore\tSuspectListScore\tXlogP3\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\t59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0\t59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H-\t105.84456906313766\t696.0;1156.0;696.0;1156.0\tNA\t206\tInChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2\tWQZGKKKJIJFFOK-UHFFFAOYSA-N\tWQZGKKKJIJFFOK\tUHFFFAOYSA\t2\tC6H12O6\t180.063388\t4\t5\t2.8459377150844047\tC(C1C(C(C(C(O1)O)O)O)O)O\t1.8267821960344066\t1.0\t-2.6\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\t59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0\t59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H-\t105.84456906313766\t696.0;1156.0;696.0;1156.0\tNA\t5793\tInChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1\tWQZGKKKJIJFFOK-GASJEMHNSA-N\tWQZGKKKJIJFFOK\tGASJEMHNSA\t2\tC6H12O6\t180.063388\t4\t5\t2.8459377150844047\tC([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O\t1.8267821960344066\t1.0\t-2.6\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\t59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0\t59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H-\t105.84456906313766\t696.0;1156.0;696.0;1156.0\tNA\t6036\tInChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3+,4+,5-,6?/m1/s1\tWQZGKKKJIJFFOK-SVZMEOIVSA-N\tWQZGKKKJIJFFOK\tSVZMEOIVSA\t2\tC6H12O6\t180.063388\t4\t5\t2.8459377150844047\tC([C@@H]1[C@@H]([C@@H]([C@H](C(O1)O)O)O)O)O\t1.8267821960344066\t1.0\t-2.6\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\t59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0\t59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H-\t105.84456906313766\t696.0;1156.0;696.0;1156.0\tNA\t18950\tInChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5+,6?/m1/s1\tWQZGKKKJIJFFOK-QTVWNMPRSA-N\tWQZGKKKJIJFFOK\tQTVWNMPRSA\t2\tC6H12O6\t180.063388\t4\t5\t2.8459377150844047\tC([C@@H]1[C@H]([C@@H]([C@@H](C(O1)O)O)O)O)O\t1.8267821960344066\t1.0\t-2.6\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\t59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0\t59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H-\t105.84456906313766\t696.0;1156.0;696.0;1156.0\tNA\t64689\tInChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6-/m1/s1\tWQZGKKKJIJFFOK-VFUOTHLCSA-N\tWQZGKKKJIJFFOK\tVFUOTHLCSA\t2\tC6H12O6\t180.063388\t4\t5\t2.8459377150844047\tC([C@@H]1[C@H]([C@@H]([C@H]([C@@H](O1)O)O)O)O)O\t1.8267821960344066\t1.0\t-2.6\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\t59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0\t59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H-\t105.84456906313766\t696.0;1156.0;696.0;1156.0\tNA\t79025\tInChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6+/m1/s1\tWQZGKKKJIJFFOK-DVKNGEFBSA-N\tWQZGKKKJIJFFOK\tDVKNGEFBSA\t2\tC6H12O6\t180.063388\t4\t5\t2.8459377150844047\tC([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O\t1.8267821960344066\t1.0\t-2.6\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\t59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0\t59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H-\t105.84456906313766\t696.0;1156.0;696.0;1156.0\tNA\t81696\tInChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3+,4+,5+,6+/m1/s1\tWQZGKKKJIJFFOK-URLGYRAOSA-N\tWQZGKKKJIJFFOK\tURLGYRAOSA\t2\tC6H12O6\t180.063388\t4\t5\t2.8459377150844047\tC([C@@H]1[C@@H]([C@@H]([C@@H]([C@H](O1)O)O)O)O)O\t1.8267821960344066\t1.0\t-2.6\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\t59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0\t59.0138:[C2H4O2]-H-;71.014:[C3H5O'..b'257\t0\t5\t0.4270730313919928\tCCCCC(SC)S(=O)C\t0.09003844935784057\t0.0\t1.9\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\tNA\t69029915\tInChI=1S/C5H7F3N4/c1-3(2)4-9-11-12(10-4)5(6,7)8/h3H,1-2H3\tVGYYIKKODBUWRB-UHFFFAOYSA-N\tVGYYIKKODBUWRB\tUHFFFAOYSA\t2\tC5H7F3N4\t180.062281\t0\t5\t0.42397734883961286\tCC(C)C1=NN(N=N1)C(F)(F)F\t0.0893857964478408\t0.0\t2.4\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\tNA\t69031683\tInChI=1S/C5H7F3N4/c1-3(2)12-10-4(9-11-12)5(6,7)8/h3H,1-2H3\tQNPVRRPJDIATQR-UHFFFAOYSA-N\tQNPVRRPJDIATQR\tUHFFFAOYSA\t2\tC5H7F3N4\t180.062281\t0\t5\t0.4221396111583605\tCC(C)N1N=C(N=N1)C(F)(F)F\t0.08899835205546809\t0.0\t1.7\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\tNA\t69385833\tInChI=1S/C5H7F3N4/c1-3(2)4(5(6,7)8)9-11-12-10-4/h3H,1-2H3\tBPNZYTIHJYBRRX-UHFFFAOYSA-N\tBPNZYTIHJYBRRX\tUHFFFAOYSA\t2\tC5H7F3N4\t180.062281\t0\t5\t0.42007842893828634\tCC(C)C1(N=NN=N1)C(F)(F)F\t0.08856379956140276\t0.0\t3.1\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\tNA\t101988465\tInChI=1S/C7H16OS2/c1-6(2)5-7(9-3)10(4)8/h6-7H,5H2,1-4H3\tVBTATWNYJIGLOE-UHFFFAOYSA-N\tVBTATWNYJIGLOE\tUHFFFAOYSA\t2\tC7H16OS2\t180.064257\t0\t5\t0.4170063510567664\tCC(C)CC(SC)S(=O)C\t0.0879161231491109\t0.0\t1.8\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\tNA\t136806474\tInChI=1S/C3H4N10/c4-5-1(2-6-10-11-7-2)3-8-12-13-9-3/h4H2,(H,6,7,10,11)(H,8,9,12,13)\tGZPPCEFYNGHFND-UHFFFAOYSA-N\tGZPPCEFYNGHFND\tUHFFFAOYSA\t2\tC3H4N10\t180.06204\t0\t5\t0.4109244697724204\tC1(=NNN=N1)C(=NN)C2=NNN=N2\t0.08663389945487261\t0.0\t-1.0\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\tNA\t10487635\tInChI=1S/C7H16OS2/c1-7(2,3)6(9-4)10(5)8/h6H,1-5H3/t6-,10+/m1/s1\tDGBCVJJGYTZLQT-LDWIPMOCSA-N\tDGBCVJJGYTZLQT\tLDWIPMOCSA\t2\tC7H16OS2\t180.064257\t0\t5\t0.401481799234462\tCC(C)(C)[C@H](SC)[S@@](=O)C\t0.08464313124770298\t0.0\t1.8\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\tNA\t10678952\tInChI=1S/C7H16OS2/c1-7(2,3)6(9-4)10(5)8/h6H,1-5H3/t6-,10-/m0/s1\tDGBCVJJGYTZLQT-WKEGUHRASA-N\tDGBCVJJGYTZLQT\tWKEGUHRASA\t2\tC7H16OS2\t180.064257\t0\t5\t0.401481799234462\tCC(C)(C)[C@@H](SC)[S@@](=O)C\t0.08464313124770298\t0.0\t1.8\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\tNA\t85157161\tInChI=1S/C7H16OS2/c1-7(2,3)6(9-4)10(5)8/h6H,1-5H3\tDGBCVJJGYTZLQT-UHFFFAOYSA-N\tDGBCVJJGYTZLQT\tUHFFFAOYSA\t2\tC7H16OS2\t180.064257\t0\t5\t0.401481799234462\tCC(C)(C)C(SC)S(=O)C\t0.08464313124770298\t0.0\t1.8\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\tNA\t101042638\tInChI=1S/C7H16OS2/c1-7(2,3)6(9-4)10(5)8/h6H,1-5H3/t6-,10?/m1/s1\tDGBCVJJGYTZLQT-ZMMDDIOLSA-N\tDGBCVJJGYTZLQT\tZMMDDIOLSA\t2\tC7H16OS2\t180.064257\t0\t5\t0.401481799234462\tCC(C)(C)[C@H](SC)S(=O)C\t0.08464313124770298\t0.0\t1.8\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\tNA\t101042640\tInChI=1S/C7H16OS2/c1-7(2,3)6(9-4)10(5)8/h6H,1-5H3/t6-,10?/m0/s1\tDGBCVJJGYTZLQT-UOQJWNSWSA-N\tDGBCVJJGYTZLQT\tUOQJWNSWSA\t2\tC7H16OS2\t180.064257\t0\t5\t0.401481799234462\tCC(C)(C)[C@@H](SC)S(=O)C\t0.08464313124770298\t0.0\t1.8\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\tNA\t134896507\tInChI=1S/C7H16OS2/c1-6(2)9-10(8)7(3,4)5/h6H,1-5H3\tGDGZWEQOPWCQRW-UHFFFAOYSA-N\tGDGZWEQOPWCQRW\tUHFFFAOYSA\t2\tC7H16OS2\t180.064257\t0\t5\t0.398653994787015\tCC(C)SS(=O)C(C)(C)C\t0.08404695422686545\t0.0\t1.6\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\tNA\t134996043\tInChI=1S/C7H16OS2/c1-6(2)10(8)9-7(3,4)5/h6H,1-5H3\tCJUKTEFJUSCQOK-UHFFFAOYSA-N\tCJUKTEFJUSCQOK\tUHFFFAOYSA\t2\tC7H16OS2\t180.064257\t0\t5\t0.398653994787015\tCC(C)S(=O)SC(C)(C)C\t0.08404695422686545\t0.0\t1.6\r\n+D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\tNA\t54519899\tInChI=1S/C7H15BCl2/c1-5(2)8(6(3)4)7(9)10/h5-7H,1-4H3\tYPAVVYHXXLUBOP-UHFFFAOYSA-N\tYPAVVYHXXLUBOP\tUHFFFAOYSA\t2\tC7H15BCl2\t180.064386\t0\t5\t0.394762015700907\tB(C(C)C)(C(C)C)C(Cl)Cl\t0.08322642065043208\t0.0\tNA\r\n'
b
diff -r 000000000000 -r fd5c0b39569a test-data/RP022611_suspect_default.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/RP022611_suspect_default.txt Wed Feb 05 12:30:06 2020 -0500
[
b'@@ -0,0 +1,2563 @@\n+adduct\tname\tsample_name\tExplPeaks\tFormulasOfExplPeaks\tFragmenterScore\tFragmenterScore_Values\tIUPACName\tIdentifier\tInChI\tInChIKey\tInChIKey1\tInChIKey2\tMaximumTreeDepth\tMolecularFormula\tMonoisotopicMass\tNoExplPeaks\tNumberPeaksUsed\tOfflineMetFusionScore\tSMILES\tScore\tSuspectListScore\tXlogP3\r\n+[M-H]-\tD-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\t59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0\t59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H-\t105.84456906313766\t696.0;1156.0;696.0;1156.0\t6-(hydroxymethyl)oxane-2,3,4,5-tetrol\t206\tInChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2\tWQZGKKKJIJFFOK-UHFFFAOYSA-N\tWQZGKKKJIJFFOK\tUHFFFAOYSA\t2\tC6H12O6\t180.063388\t4\t5\t2.8456682842407846\tC(C1C(C(C(C(O1)O)O)O)O)O\t1.8267821960344066\t1.0\t-2.6\r\n+[M-H]-\tD-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\t59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0\t59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H-\t105.84456906313766\t696.0;1156.0;696.0;1156.0\t(3R,4S,5S,6R)-6-(hydroxymethyl)oxane-2,3,4,5-tetrol\t5793\tInChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1\tWQZGKKKJIJFFOK-GASJEMHNSA-N\tWQZGKKKJIJFFOK\tGASJEMHNSA\t2\tC6H12O6\t180.063388\t4\t5\t2.8456682842407846\tC([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O\t1.8267821960344066\t1.0\t-2.6\r\n+[M-H]-\tD-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\t59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0\t59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H-\t105.84456906313766\t696.0;1156.0;696.0;1156.0\t(3R,4S,5R,6R)-6-(hydroxymethyl)oxane-2,3,4,5-tetrol\t6036\tInChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3+,4+,5-,6?/m1/s1\tWQZGKKKJIJFFOK-SVZMEOIVSA-N\tWQZGKKKJIJFFOK\tSVZMEOIVSA\t2\tC6H12O6\t180.063388\t4\t5\t2.8456682842407846\tC([C@@H]1[C@@H]([C@@H]([C@H](C(O1)O)O)O)O)O\t1.8267821960344066\t1.0\t-2.6\r\n+[M-H]-\tD-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\t59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0\t59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H-\t105.84456906313766\t696.0;1156.0;696.0;1156.0\t(3S,4S,5S,6R)-6-(hydroxymethyl)oxane-2,3,4,5-tetrol\t18950\tInChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5+,6?/m1/s1\tWQZGKKKJIJFFOK-QTVWNMPRSA-N\tWQZGKKKJIJFFOK\tQTVWNMPRSA\t2\tC6H12O6\t180.063388\t4\t5\t2.8456682842407846\tC([C@@H]1[C@H]([C@@H]([C@@H](C(O1)O)O)O)O)O\t1.8267821960344066\t1.0\t-2.6\r\n+[M-H]-\tD-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\t59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0\t59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H-\t105.84456906313766\t696.0;1156.0;696.0;1156.0\t(2R,3R,4S,5S,6R)-6-(hydroxymethyl)oxane-2,3,4,5-tetrol\t64689\tInChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6-/m1/s1\tWQZGKKKJIJFFOK-VFUOTHLCSA-N\tWQZGKKKJIJFFOK\tVFUOTHLCSA\t2\tC6H12O6\t180.063388\t4\t5\t2.8456682842407846\tC([C@@H]1[C@H]([C@@H]([C@H]([C@@H](O1)O)O)O)O)O\t1.8267821960344066\t1.0\t-2.6\r\n+[M-H]-\tD-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\t59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0\t59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H-\t105.84456906313766\t696.0;1156.0;696.0;1156.0\t(2S,3R,4S,5S,6R)-6-(hydroxymethyl)oxane-2,3,4,5-tetrol\t79025\tInChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6+/m1/s1\tWQZGKKKJIJFFOK-DVKNGEFBSA-N\tWQZGKKKJIJFFOK\tDVKNGEFBSA\t2\tC6H12O6\t180.063388\t4\t5\t2.8456682842407846\tC([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O\t1.8267821960344066\t1.0\t-2.6\r\n+[M-H]-\tD-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\t59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0\t59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H-\t105.84456906313766\t696.0;1156.0;696.0;1156.0\t(2S,3S,4S,5R,6R)-6-(hydroxymethyl)oxane-2,3,4,5-tetrol\t81696\tInChI=1S/C6H12O6/'..b'1\tCC(C)N1N=C(N=N1)C(F)(F)F\t0.08900632336289885\t0.0\t1.7\r\n+[M-H]-\tD-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\t5-propan-2-yl-5-(trifluoromethyl)tetrazole\t69385833\tInChI=1S/C5H7F3N4/c1-3(2)4(5(6,7)8)9-11-12-10-4/h3H,1-2H3\tBPNZYTIHJYBRRX-UHFFFAOYSA-N\tBPNZYTIHJYBRRX\tUHFFFAOYSA\t2\tC5H7F3N4\t180.062281\t0\t5\t0.42007630805387386\tCC(C)C1(N=NN=N1)C(F)(F)F\t0.08857173769274002\t0.0\t3.1\r\n+[M-H]-\tD-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\t3-methyl-1-methylsulfanyl-1-methylsulfinylbutane\t101988465\tInChI=1S/C7H16OS2/c1-6(2)5-7(9-3)10(4)8/h6-7H,5H2,1-4H3\tVBTATWNYJIGLOE-UHFFFAOYSA-N\tVBTATWNYJIGLOE\tUHFFFAOYSA\t2\tC7H16OS2\t180.064257\t0\t5\t0.4170042929070892\tCC(C)CC(SC)S(=O)C\t0.08792401318518639\t0.0\t1.8\r\n+[M-H]-\tD-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\tbis(2H-tetrazol-5-yl)methylidenehydrazine\t136806474\tInChI=1S/C3H4N10/c4-5-1(2-6-10-11-7-2)3-8-12-13-9-3/h4H2,(H,6,7,10,11)(H,8,9,12,13)\tGZPPCEFYNGHFND-UHFFFAOYSA-N\tGZPPCEFYNGHFND\tUHFFFAOYSA\t2\tC3H4N10\t180.06204\t0\t5\t0.4104853063643431\tC1(=NNN=N1)C(=NN)C2=NNN=N2\t0.08654950585159843\t0.0\t-1.0\r\n+[M-H]-\tD-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\t(1R)-2,2-dimethyl-1-methylsulfanyl-1-[(S)-methylsulfinyl]propane\t10487635\tInChI=1S/C7H16OS2/c1-7(2,3)6(9-4)10(5)8/h6H,1-5H3/t6-,10+/m1/s1\tDGBCVJJGYTZLQT-LDWIPMOCSA-N\tDGBCVJJGYTZLQT\tLDWIPMOCSA\t2\tC7H16OS2\t180.064257\t0\t5\t0.4014805265708314\tCC(C)(C)[C@H](SC)[S@@](=O)C\t0.08465087701069385\t0.0\t1.8\r\n+[M-H]-\tD-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\t(1S)-2,2-dimethyl-1-methylsulfanyl-1-[(S)-methylsulfinyl]propane\t10678952\tInChI=1S/C7H16OS2/c1-7(2,3)6(9-4)10(5)8/h6H,1-5H3/t6-,10-/m0/s1\tDGBCVJJGYTZLQT-WKEGUHRASA-N\tDGBCVJJGYTZLQT\tWKEGUHRASA\t2\tC7H16OS2\t180.064257\t0\t5\t0.4014805265708314\tCC(C)(C)[C@@H](SC)[S@@](=O)C\t0.08465087701069385\t0.0\t1.8\r\n+[M-H]-\tD-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\t2,2-dimethyl-1-methylsulfanyl-1-methylsulfinylpropane\t85157161\tInChI=1S/C7H16OS2/c1-7(2,3)6(9-4)10(5)8/h6H,1-5H3\tDGBCVJJGYTZLQT-UHFFFAOYSA-N\tDGBCVJJGYTZLQT\tUHFFFAOYSA\t2\tC7H16OS2\t180.064257\t0\t5\t0.4014805265708314\tCC(C)(C)C(SC)S(=O)C\t0.08465087701069385\t0.0\t1.8\r\n+[M-H]-\tD-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\t(1R)-2,2-dimethyl-1-methylsulfanyl-1-methylsulfinylpropane\t101042638\tInChI=1S/C7H16OS2/c1-7(2,3)6(9-4)10(5)8/h6H,1-5H3/t6-,10?/m1/s1\tDGBCVJJGYTZLQT-ZMMDDIOLSA-N\tDGBCVJJGYTZLQT\tZMMDDIOLSA\t2\tC7H16OS2\t180.064257\t0\t5\t0.4014805265708314\tCC(C)(C)[C@H](SC)S(=O)C\t0.08465087701069385\t0.0\t1.8\r\n+[M-H]-\tD-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\t(1S)-2,2-dimethyl-1-methylsulfanyl-1-methylsulfinylpropane\t101042640\tInChI=1S/C7H16OS2/c1-7(2,3)6(9-4)10(5)8/h6H,1-5H3/t6-,10?/m0/s1\tDGBCVJJGYTZLQT-UOQJWNSWSA-N\tDGBCVJJGYTZLQT\tUOQJWNSWSA\t2\tC7H16OS2\t180.064257\t0\t5\t0.4014805265708314\tCC(C)(C)[C@@H](SC)S(=O)C\t0.08465087701069385\t0.0\t1.8\r\n+[M-H]-\tD-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\t2-methyl-2-propan-2-ylsulfanylsulfinylpropane\t134896507\tInChI=1S/C7H16OS2/c1-6(2)9-10(8)7(3,4)5/h6H,1-5H3\tGDGZWEQOPWCQRW-UHFFFAOYSA-N\tGDGZWEQOPWCQRW\tUHFFFAOYSA\t2\tC7H16OS2\t180.064257\t0\t5\t0.39865303975087146\tCC(C)SS(=O)C(C)(C)C\t0.08405471051392714\t0.0\t1.6\r\n+[M-H]-\tD-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\t2-methyl-2-propan-2-ylsulfinylsulfanylpropane\t134996043\tInChI=1S/C7H16OS2/c1-6(2)10(8)9-7(3,4)5/h6H,1-5H3\tCJUKTEFJUSCQOK-UHFFFAOYSA-N\tCJUKTEFJUSCQOK\tUHFFFAOYSA\t2\tC7H16OS2\t180.064257\t0\t5\t0.39865303975087146\tCC(C)S(=O)SC(C)(C)C\t0.08405471051392714\t0.0\t1.6\r\n+[M-H]-\tD-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=; [M-H]-\t1_metfrag_result\tNA\tNA\t0.0\tNA\tdichloromethyl-di(propan-2-yl)borane\t54519899\tInChI=1S/C7H15BCl2/c1-5(2)8(6(3)4)7(9)10/h5-7H,1-4H3\tYPAVVYHXXLUBOP-UHFFFAOYSA-N\tYPAVVYHXXLUBOP\tUHFFFAOYSA\t2\tC7H15BCl2\t180.064386\t0\t5\t0.39476099327258585\tB(C(C)C)(C(C)C)C(Cl)Cl\t0.08323408503909446\t0.0\tNA\r\n'
b
diff -r 000000000000 -r fd5c0b39569a test-data/demo_db.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo_db.csv Wed Feb 05 12:30:06 2020 -0500
[
@@ -0,0 +1,14 @@
+Identifier,MonoisotopicMass,MolecularFormula,SMILES,InChI,InChIKey1,InChIKey2,InChIKey3,Name,InChIKey
+HMDB0000123,75.03202841,C2H5NO2,NCC(O)=O,"InChI=1S/C2H5NO2/c3-1-2(4)5/h1,3H2,(H,4,5)",DHMQDGOQFOQNFH,UHFFFAOYSA,N,Glycine,DHMQDGOQFOQNFH-UHFFFAOYSA-N
+HMDB0002151,78.0139355,C2H6OS,CS(C)=O,InChI=1S/C2H6OS/c1-4(2)3/h1-2H3,IAZDPXIOMUYVGZ,UHFFFAOYSA,N,Dimethyl sulfoxide,IAZDPXIOMUYVGZ-UHFFFAOYSA-N
+HMDB0031239,75.03202841,C2H5NO2,CCON=O,"InChI=1S/C2H5NO2/c1-2-5-3-4/h2H2,1H3",QQZWEECEMNQSTG,UHFFFAOYSA,N,Ethyl nitrite,QQZWEECEMNQSTG-UHFFFAOYSA-N
+HMDB0014691,75.03202841,C2H5NO2,CC(=O)NO,"InChI=1S/C2H5NO2/c1-2(4)3-5/h5H,1H3,(H,3,4)",RRUDCFGSUDOHDG,UHFFFAOYSA,N,Acetohydroxamic Acid,RRUDCFGSUDOHDG-UHFFFAOYSA-N
+HMDB0002039,85.05276385,C4H7NO,O=C1CCCN1,"InChI=1S/C4H7NO/c6-4-2-1-3-5-4/h1-3H2,(H,5,6)",HNJBEVLQSNELDL,UHFFFAOYSA,N,2-Pyrrolidinone,HNJBEVLQSNELDL-UHFFFAOYSA-N
+HMDB0060427,85.05276385,C4H7NO,CC(C)(O)C#N,"InChI=1S/C4H7NO/c1-4(2,6)3-5/h6H,1-2H3",MWFMGBPGAXYFAR,UHFFFAOYSA,N,Acetone cyanohydrin,MWFMGBPGAXYFAR-UHFFFAOYSA-N
+679,78.014,C2H6OS,CS(=O)C,InChI=1S/C2H6OS/c1-4(2)3/h1-2H3,IAZDPXIOMUYVGZ,UHFFFAOYSA,N,test,IAZDPXIOMUYVGZ-UHFFFAOYSA-N
+71360412,79.857,CHBN4,[B]C1=NNN=N1,"InChI=1S/CHBN4/c2-1-3-5-6-4-1/h(H,3,4,5,6)",AHLHXBWPSRCALN,UHFFFAOYSA,N,"1H-Tetrazole, 5-boryl-",AHLHXBWPSRCALN-UHFFFAOYSA-N
+71360412,79.857,CHBN4,[B]C1=NNN=N1,"InChI=1S/CHBN4/c2-1-3-5-6-4-1/h(H,3,4,5,6)",AHLHXBWPSRCALN,UHFFFAOYSA,N,"1H-Tetrazole, 5-boryl-",AHLHXBWPSRCALN-UHFFFAOYSA-N
+22340199,87.068,C4H9NO,CCCCN=O,"InChI=1S/C4H9NO/c1-2-3-4-5-6/h2-4H2,1H3",,,,nitrosobutane,VVGONFMJWMFEMZ-UHFFFAOYSA-N
+11629016,696.014,C29H21ClF3IN2O5,CC1=C(C2=C(N1CC3=CC=C(C=C3)C[C@@]4(C(=O)NC(=O)O4)C)C=C(C=C2)OC(F)(F)F)C(=O)C5=C(C=C(C=C5)Cl)I,"InChI=1S/C29H21ClF3IN2O5/c1-15-24(25(37)20-9-7-18(30)11-22(20)34)21-10-8-19(40-29(31,32)33)12-23(21)36(15)14-17-5-3-16(4-6-17)13-28(2)26(38)35-27(39)41-28/h3-12H,13-14H2,1-2H3,(H,35,38,39)/t28-/m1/s1",UGAKGYFHYQHBSZ,MUUNZHRXSA,N,Example 1,UGAKGYFHYQHBSZ-MUUNZHRXSA-N
+11629016_duplicate,696.014,C29H21ClF3IN2O5,CC1=C(C2=C(N1CC3=CC=C(C=C3)C[C@@]4(C(=O)NC(=O)O4)C)C=C(C=C2)OC(F)(F)F)C(=O)C5=C(C=C(C=C5)Cl)I,"InChI=1S/C29H21ClF3IN2O5/c1-15-24(25(37)20-9-7-18(30)11-22(20)34)21-10-8-19(40-29(31,32)33)12-23(21)36(15)14-17-5-3-16(4-6-17)13-28(2)26(38)35-27(39)41-28/h3-12H,13-14H2,1-2H3,(H,35,38,39)/t28-/m1/s1",UGAKGYFHYQHBSZ,MUUNZHRXSA,N,Example_duplicate,UGAKGYFHYQHBSZ-MUUNZHRXSA-N
+5793,180.0633881,C6H12O6,C(C1C(C(C(C(O1)O)O)O)O)O,"InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1",WQZGKKKJIJFFOK,GASJEMHNSA,N,D-Glucose,WQZGKKKJIJFFOK-GASJEMHNSA-N
b
diff -r 000000000000 -r fd5c0b39569a test-data/dup_check.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dup_check.txt Wed Feb 05 12:30:06 2020 -0500
[
@@ -0,0 +1,149 @@
+ACCESSION: FU000001
+RECORD_TITLE: 1111111111111111
+DATE: 2016.01.19 (Created 2009.02.18, modified 2011.05.06)
+AUTHORS: Matsuura F, Ohta M, Kittaka M, Faculty of Life Science and Biotechnology, Fukuyama University
+LICENSE: CC BY-SA
+CH$NAME: 3-Man2GlcNAc
+CH$NAME: Man-alpha-1-3Man-beta-1-4GlcNac
+CH$COMPOUND_CLASS: Natural Product; Oligosaccharide; N-linked glycan; High-mannose type
+CH$FORMULA: C20H35NO16
+CH$EXACT_MASS: 545.19558
+CH$SMILES: CC(=O)NC(C(O)1)C(O)C(OC(O2)C(O)C(OC(O3)C(O)C(O)C(O)C(CO)3)C(O)C(CO)2)C(CO)O1
+CH$IUPAC: InChI=1/C20H35NO16/c1-5(25)21-9-12(28)16(8(4-24)33-18(9)32)36-20-15(31)17(11(27)7(3-23)35-20)37-19-14(30)13(29)10(26)6(2-22)34-19/h6-20,22-24,26-32H,2-4H2,1H3,(H,21,25)/t6-,7-,8-,9-,10-,11-,12-,13+,14-,15+,16-,17+,18+,19-,20+/m1/s1/f/h21H
+CH$LINK: CHEMSPIDER 24606097
+CH$LINK: KEGG G00319
+AC$INSTRUMENT: 2695 HPLC Quadro Micro API, Waters
+AC$INSTRUMENT_TYPE: LC-ESI-QQ
+AC$MASS_SPECTROMETRY: MS_TYPE MS2
+AC$MASS_SPECTROMETRY: ION_MODE POSITIVE
+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 15.0 V
+AC$MASS_SPECTROMETRY: DATAFORMAT Centroid
+AC$MASS_SPECTROMETRY: DESOLVATION_GAS_FLOW 897 L/Hr
+AC$MASS_SPECTROMETRY: DESOLVATION_TEMPERATURE 399 C
+AC$MASS_SPECTROMETRY: FRAGMENTATION_METHOD LOW-ENERGY CID
+AC$MASS_SPECTROMETRY: IONIZATION ESI
+AC$MASS_SPECTROMETRY: SCANNING 1 amu/sec (m/z = 20-2040)
+AC$MASS_SPECTROMETRY: SOURCE_TEMPERATURE 100C
+AC$CHROMATOGRAPHY: COLUMN_NAME TSK-GEL Amide-80 2.0 mm X 250 mm (TOSOH)
+AC$CHROMATOGRAPHY: COLUMN_TEMPERATURE 40 C
+AC$CHROMATOGRAPHY: FLOW_GRADIENT 74/26 at 0 min, 50/50 at 60 min.
+AC$CHROMATOGRAPHY: FLOW_RATE 0.2 ml/min
+AC$CHROMATOGRAPHY: RETENTION_TIME 7.080 min
+AC$CHROMATOGRAPHY: SAMPLING_CONE 43.10 V
+AC$CHROMATOGRAPHY: SOLVENT CH3CN/H2O
+MS$FOCUSED_ION: DERIVATIVE_FORM C29H46N2O17
+MS$FOCUSED_ION: DERIVATIVE_MASS 694.27965
+MS$FOCUSED_ION: DERIVATIVE_TYPE ABEE (p-Aminobenzoic acid ethyl ester)
+MS$FOCUSED_ION: PRECURSOR_M/Z 695.00
+MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+
+PK$SPLASH: splash10-0006-0002029000-bfe86aa7211a87812a06
+PK$NUM_PEAK: 8
+PK$PEAK: m/z int. rel.int.
+  370.8 3.277e5 366
+  371.4 3.036e4 34
+  532.0 5.812e4 65
+  532.6 2.982e5 333
+  533.3 5.196e4 58
+  694.1 4.564e5 510
+  694.8 8.939e5 999
+  695.4 5.537e4 62
+
+ACCESSION: FU000001
+RECORD_TITLE: 222222222222222222
+DATE: 2016.01.19 (Created 2009.02.18, modified 2011.05.06)
+AUTHORS: Matsuura F, Ohta M, Kittaka M, Faculty of Life Science and Biotechnology, Fukuyama University
+LICENSE: CC BY-SA
+CH$NAME: 3-Man2GlcNAc
+CH$NAME: Man-alpha-1-3Man-beta-1-4GlcNac
+CH$COMPOUND_CLASS: Natural Product; Oligosaccharide; N-linked glycan; High-mannose type
+CH$FORMULA: C20H35NO16
+CH$EXACT_MASS: 545.19558
+CH$SMILES: CC(=O)NC(C(O)1)C(O)C(OC(O2)C(O)C(OC(O3)C(O)C(O)C(O)C(CO)3)C(O)C(CO)2)C(CO)O1
+CH$IUPAC: InChI=1/C20H35NO16/c1-5(25)21-9-12(28)16(8(4-24)33-18(9)32)36-20-15(31)17(11(27)7(3-23)35-20)37-19-14(30)13(29)10(26)6(2-22)34-19/h6-20,22-24,26-32H,2-4H2,1H3,(H,21,25)/t6-,7-,8-,9-,10-,11-,12-,13+,14-,15+,16-,17+,18+,19-,20+/m1/s1/f/h21H
+CH$LINK: CHEMSPIDER 24606097
+CH$LINK: KEGG G00319
+AC$INSTRUMENT: 2695 HPLC Quadro Micro API, Waters
+AC$INSTRUMENT_TYPE: LC-ESI-QQ
+AC$MASS_SPECTROMETRY: MS_TYPE MS2
+AC$MASS_SPECTROMETRY: ION_MODE POSITIVE
+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 15.0 V
+AC$MASS_SPECTROMETRY: DATAFORMAT Centroid
+AC$MASS_SPECTROMETRY: DESOLVATION_GAS_FLOW 897 L/Hr
+AC$MASS_SPECTROMETRY: DESOLVATION_TEMPERATURE 399 C
+AC$MASS_SPECTROMETRY: FRAGMENTATION_METHOD LOW-ENERGY CID
+AC$MASS_SPECTROMETRY: IONIZATION ESI
+AC$MASS_SPECTROMETRY: SCANNING 1 amu/sec (m/z = 20-2040)
+AC$MASS_SPECTROMETRY: SOURCE_TEMPERATURE 100C
+AC$CHROMATOGRAPHY: COLUMN_NAME TSK-GEL Amide-80 2.0 mm X 250 mm (TOSOH)
+AC$CHROMATOGRAPHY: COLUMN_TEMPERATURE 40 C
+AC$CHROMATOGRAPHY: FLOW_GRADIENT 74/26 at 0 min, 50/50 at 60 min.
+AC$CHROMATOGRAPHY: FLOW_RATE 0.2 ml/min
+AC$CHROMATOGRAPHY: RETENTION_TIME 7.080 min
+AC$CHROMATOGRAPHY: SAMPLING_CONE 43.10 V
+AC$CHROMATOGRAPHY: SOLVENT CH3CN/H2O
+MS$FOCUSED_ION: DERIVATIVE_FORM C29H46N2O17
+MS$FOCUSED_ION: DERIVATIVE_MASS 694.27965
+MS$FOCUSED_ION: DERIVATIVE_TYPE ABEE (p-Aminobenzoic acid ethyl ester)
+MS$FOCUSED_ION: PRECURSOR_M/Z 695.00
+MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+
+PK$SPLASH: splash10-0006-0002029000-bfe86aa7211a87812a06
+PK$NUM_PEAK: 8
+PK$PEAK: m/z int. rel.int.
+  370.8 3.277e5 366
+  371.4 3.036e4 34
+  532.0 5.812e4 65
+  532.6 2.982e5 333
+  533.3 5.196e4 58
+  694.1 4.564e5 510
+  694.8 8.939e5 999
+  695.4 5.537e4 62
+
+ACCESSION: FU000001
+RECORD_TITLE: 33333333333333333
+DATE: 2016.01.19 (Created 2009.02.18, modified 2011.05.06)
+AUTHORS: Matsuura F, Ohta M, Kittaka M, Faculty of Life Science and Biotechnology, Fukuyama University
+LICENSE: CC BY-SA
+CH$NAME: 3-Man2GlcNAc
+CH$NAME: Man-alpha-1-3Man-beta-1-4GlcNac
+CH$COMPOUND_CLASS: Natural Product; Oligosaccharide; N-linked glycan; High-mannose type
+CH$FORMULA: C20H35NO16
+CH$EXACT_MASS: 545.19558
+CH$SMILES: CC(=O)NC(C(O)1)C(O)C(OC(O2)C(O)C(OC(O3)C(O)C(O)C(O)C(CO)3)C(O)C(CO)2)C(CO)O1
+CH$IUPAC: InChI=1/C20H35NO16/c1-5(25)21-9-12(28)16(8(4-24)33-18(9)32)36-20-15(31)17(11(27)7(3-23)35-20)37-19-14(30)13(29)10(26)6(2-22)34-19/h6-20,22-24,26-32H,2-4H2,1H3,(H,21,25)/t6-,7-,8-,9-,10-,11-,12-,13+,14-,15+,16-,17+,18+,19-,20+/m1/s1/f/h21H
+CH$LINK: CHEMSPIDER 24606097
+CH$LINK: KEGG G00319
+AC$INSTRUMENT: 2695 HPLC Quadro Micro API, Waters
+AC$INSTRUMENT_TYPE: LC-ESI-QQ
+AC$MASS_SPECTROMETRY: MS_TYPE MS2
+AC$MASS_SPECTROMETRY: ION_MODE POSITIVE
+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 15.0 V
+AC$MASS_SPECTROMETRY: DATAFORMAT Centroid
+AC$MASS_SPECTROMETRY: DESOLVATION_GAS_FLOW 897 L/Hr
+AC$MASS_SPECTROMETRY: DESOLVATION_TEMPERATURE 399 C
+AC$MASS_SPECTROMETRY: FRAGMENTATION_METHOD LOW-ENERGY CID
+AC$MASS_SPECTROMETRY: IONIZATION ESI
+AC$MASS_SPECTROMETRY: SCANNING 1 amu/sec (m/z = 20-2040)
+AC$MASS_SPECTROMETRY: SOURCE_TEMPERATURE 100C
+AC$CHROMATOGRAPHY: COLUMN_NAME TSK-GEL Amide-80 2.0 mm X 250 mm (TOSOH)
+AC$CHROMATOGRAPHY: COLUMN_TEMPERATURE 40 C
+AC$CHROMATOGRAPHY: FLOW_GRADIENT 74/26 at 0 min, 50/50 at 60 min.
+AC$CHROMATOGRAPHY: FLOW_RATE 0.2 ml/min
+AC$CHROMATOGRAPHY: RETENTION_TIME 7.080 min
+AC$CHROMATOGRAPHY: SAMPLING_CONE 43.10 V
+AC$CHROMATOGRAPHY: SOLVENT CH3CN/H2O
+MS$FOCUSED_ION: DERIVATIVE_FORM C29H46N2O17
+MS$FOCUSED_ION: DERIVATIVE_MASS 694.27965
+MS$FOCUSED_ION: DERIVATIVE_TYPE ABEE (p-Aminobenzoic acid ethyl ester)
+MS$FOCUSED_ION: PRECURSOR_M/Z 695.00
+MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+
+PK$SPLASH: splash10-0006-0002029000-bfe86aa7211a87812a06
+PK$NUM_PEAK: 8
+PK$PEAK: m/z int. rel.int.
+  370.8 3.277e5 366
+  371.4 3.036e4 34
+  532.0 5.812e4 65
+  532.6 2.982e5 333
+  533.3 5.196e4 58
+  694.1 4.564e5 510
+  694.8 8.939e5 999
+  695.4 5.537e4 62
b
diff -r 000000000000 -r fd5c0b39569a test-data/generic_format.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/generic_format.msp Wed Feb 05 12:30:06 2020 -0500
b
@@ -0,0 +1,123 @@
+NAME:  MZ:70.0659 | RT:1483 | XCMS_group:1 | file:1 | scan:NA | pid:NA
+PRECURSORMZ: 70.0658950805664
+Comment:
+Num Peaks: 8
+50.4781379699707 3487.4296875 4.61
+51.0193099975586 3390.96948242188 4.49
+53.0031509399414 10011.958984375 13.25
+53.5898513793945 4252.7880859375 5.63
+54.3787727355957 3541.5107421875 4.69
+69.0455169677734 9650.0107421875 12.77
+70.0660934448242 37168.609375 49.18
+82.9910659790039 4077.36694335938 5.39
+
+NAME:  MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA
+PRECURSORMZ: 72.0815277099609
+COMMENT:
+Num Peaks: 6
+51.773567199707 818.313903808594 10.98
+54.0346794128418 1247.91137695312 16.75
+54.6847991943359 967.616882324219 12.98
+56.050350189209 1780.01037597656 23.90
+58.4994125366211 975.196228027344 13.09
+72.0814056396484 1660.50390625 22.29
+
+NAME:  MZ:72.0815 | RT:1857 | scan:NA
+PRECURSORMZ: 72.08154296875
+COMMENT:
+Num Peaks: 4
+56.0504341125488 1838.78173828125 46.54
+59.9103507995605 701.556762695312 17.75
+63.7723731994629 650.224975585938 16.46
+72.0814590454102 760.228637695312 19.25
+
+NAME:  MZ:76.0400 | RT:1606 | XCMS_group:5 | file:1 | scan:NA
+PRECURSORMZ: 76.0400390625
+COMMENT:
+Num Peaks: 4
+53.2376174926758 3224.35571289062 25.41
+60.3291244506836 3193.19482421875 25.17
+73.7529830932617 3305.61401367188 26.05
+82.5309600830078 2965.41772460938 23.37
+
+NAME:  MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA
+PRECURSORMZ: 79.0218658447266
+COMMENT:
+Num Peaks: 7
+53.6282501220703 15316.7431640625 1.07
+59.967342376709 251727.734375 17.51
+61.0115814208984 80113.8046875 5.57
+62.9908714294434 93065.1015625 6.47
+63.9986305236816 950876.9375 66.13
+79.0219345092773 33032.984375 2.30
+95.4936447143555 13826.033203125 0.96
+
+NAME:  MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA
+PRECURSORMZ: 79.0218811035156
+COMMENT:
+Num Peaks: 5
+59.1125831604004 67799.1953125 3.10
+59.9673652648926 345613.1875 15.83
+62.9906845092773 117693.296875 5.39
+63.9986686706543 1585970.25 72.62
+80.5974655151367 66719.4609375 3.06
+
+NAME:  MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA
+PRECURSORMZ: 79.0218887329102
+COMMENT:
+Num Peaks: 12
+53.1700401306152 2441.47143554688 2.54
+55.1893730163574 2006.07958984375 2.08
+58.9013671875 2539.39086914062 2.64
+59.9673500061035 13423.1376953125 13.94
+61.0115776062012 4831.0986328125 5.02
+62.9908828735352 3668.52905273438 3.81
+63.9986190795898 54386.6640625 56.50
+73.8388671875 2330.30126953125 2.42
+78.5768051147461 2563.25 2.66
+79.0221328735352 2581.44604492188 2.68
+96.8009872436523 2530.70141601562 2.63
+99.6652908325195 2961.3095703125 3.08
+
+NAME:  MZ:79.9904 | RT:1284 | XCMS_group:11 | file:1 | scan:NA
+PRECURSORMZ: 79.9903564453125
+COMMENT:
+Num Peaks: 3
+51.6917915344238 584.212829589844 31.93
+53.0398750305176 649.807922363281 35.48
+97.3154754638672 596.341003417969 32.59
+
+NAME:  MZ:86.0607 | RT:1497 | XCMS_group:19 | file:1 | scan:NA
+PRECURSORMZ: 86.060661315918
+COMMENT:
+Num Peaks: 4
+53.0031318664551 9658.7939453125 60.81
+53.1939277648926 1998.81518554688 12.58
+80.3447494506836 2044.23645019531 12.87
+101.307479858398 2181.85522460938 13.73
+
+NAME:  MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA
+PRECURSORMZ: 86.0606307983398
+COMMENT:
+Num Peaks: 11
+52.6782836914062 1061.12646484375 3.59
+53.0032196044922 15176.8583984375 51.38
+53.1121788024902 1193.6044921875 4.039
+53.9984169006348 2790.28930664062 9.45
+54.0287094116211 999.250427246094 3.38
+56.7024726867676 1171.42797851562 3.96
+69.0346069335938 1878.03894042969 3.36
+72.9083633422852 1256.455078125 4.25
+74.0740356445312 1324.07055664062 4.48
+80.5324630737305 1329.61022949219 4.50
+91.0167770385742 1362.0029296875 4.61
+
+NAME:  MZ:86.0607 | RT:1500 | XCMS_group:19 | file:1 | scan:NA
+PRECURSORMZ: 86.0606536865234
+COMMENT:
+Num Peaks: 5
+53.0031509399414 29580.330078125 61.35
+55.3490409851074 4989.64990234375 10.35
+61.990592956543 4089.9619140625 8.48
+63.2290992736816 4168.97412109375 8.64
+67.6647109985352 5392.48779296875 11.18
b
diff -r 000000000000 -r fd5c0b39569a test-data/invalid_adduct.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/invalid_adduct.msp Wed Feb 05 12:30:06 2020 -0500
[
@@ -0,0 +1,13 @@
+NAME:  MZ:70.0659 | RT:1483 | XCMS_group:1 | file:1 | scan:NA | pid:NA
+PRECURSORMZ: 70.0658950805664
+ADDUCT: [M+INVALID_ADDUCT]+
+Comment:
+Num Peaks: 8
+50.4781379699707 3487.4296875 4.61
+51.0193099975586 3390.96948242188 4.49
+53.0031509399414 10011.958984375 13.25
+53.5898513793945 4252.7880859375 5.63
+54.3787727355957 3541.5107421875 4.69
+69.0455169677734 9650.0107421875 12.77
+70.0660934448242 37168.609375 49.18
+82.9910659790039 4077.36694335938 5.39
b
diff -r 000000000000 -r fd5c0b39569a test-data/massbank_format.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/massbank_format.txt Wed Feb 05 12:30:06 2020 -0500
b
@@ -0,0 +1,134 @@
+RECORD_TITLE:  MZ:70.0659 | RT:1483 | XCMS_group:1 | file:1 | scan:NA | pid:NA
+MS$FOCUSED_ION: PRECURSOR_M/Z 70.0658950805664
+COMMENT:
+PK$NUM_PEAK: 8
+PK$PEAK: m/z int. rel.int.
+50.4781379699707 3487.4296875 4.61
+51.0193099975586 3390.96948242188 4.49
+53.0031509399414 10011.958984375 13.25
+53.5898513793945 4252.7880859375 5.63
+54.3787727355957 3541.5107421875 4.69
+69.0455169677734 9650.0107421875 12.77
+70.0660934448242 37168.609375 49.18
+82.9910659790039 4077.36694335938 5.39
+
+RECORD_TITLE:  MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA
+MS$FOCUSED_ION: PRECURSOR_M/Z 72.0815277099609
+COMMENT:
+PK$NUM_PEAK: 6
+PK$PEAK: m/z int. rel.int.
+51.773567199707 818.313903808594 10.98
+54.0346794128418 1247.91137695312 16.75
+54.6847991943359 967.616882324219 12.98
+56.050350189209 1780.01037597656 23.90
+58.4994125366211 975.196228027344 13.09
+72.0814056396484 1660.50390625 22.29
+
+RECORD_TITLE:  MZ:72.0815 | RT:1857 | scan:NA
+MS$FOCUSED_ION: PRECURSOR_M/Z 72.08154296875
+COMMENT:
+PK$NUM_PEAK: 4
+PK$PEAK: m/z int. rel.int.
+56.0504341125488 1838.78173828125 46.54
+59.9103507995605 701.556762695312 17.75
+63.7723731994629 650.224975585938 16.46
+72.0814590454102 760.228637695312 19.25
+
+RECORD_TITLE:  MZ:76.0400 | RT:1606 | XCMS_group:5 | file:1 | scan:NA
+MS$FOCUSED_ION: PRECURSOR_M/Z 76.0400390625
+COMMENT:
+PK$NUM_PEAK: 4
+PK$PEAK: m/z int. rel.int.
+53.2376174926758 3224.35571289062 25.41
+60.3291244506836 3193.19482421875 25.17
+73.7529830932617 3305.61401367188 26.05
+82.5309600830078 2965.41772460938 23.37
+
+RECORD_TITLE:  MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA
+MS$FOCUSED_ION: PRECURSOR_M/Z 79.0218658447266
+COMMENT:
+PK$NUM_PEAK: 7
+PK$PEAK: m/z int. rel.int.
+53.6282501220703 15316.7431640625 1.07
+59.967342376709 251727.734375 17.51
+61.0115814208984 80113.8046875 5.57
+62.9908714294434 93065.1015625 6.47
+63.9986305236816 950876.9375 66.13
+79.0219345092773 33032.984375 2.30
+95.4936447143555 13826.033203125 0.96
+
+RECORD_TITLE:  MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA
+MS$FOCUSED_ION: PRECURSOR_M/Z 79.0218811035156
+COMMENT:
+PK$NUM_PEAK: 5
+PK$PEAK: m/z int. rel.int.
+59.1125831604004 67799.1953125 3.10
+59.9673652648926 345613.1875 15.83
+62.9906845092773 117693.296875 5.39
+63.9986686706543 1585970.25 72.62
+80.5974655151367 66719.4609375 3.06
+
+RECORD_TITLE:  MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA
+MS$FOCUSED_ION: PRECURSOR_M/Z 79.0218887329102
+COMMENT:
+PK$NUM_PEAK: 12
+PK$PEAK: m/z int. rel.int.
+53.1700401306152 2441.47143554688 2.54
+55.1893730163574 2006.07958984375 2.08
+58.9013671875 2539.39086914062 2.64
+59.9673500061035 13423.1376953125 13.94
+61.0115776062012 4831.0986328125 5.02
+62.9908828735352 3668.52905273438 3.81
+63.9986190795898 54386.6640625 56.50
+73.8388671875 2330.30126953125 2.42
+78.5768051147461 2563.25 2.66
+79.0221328735352 2581.44604492188 2.68
+96.8009872436523 2530.70141601562 2.63
+99.6652908325195 2961.3095703125 3.08
+
+RECORD_TITLE:  MZ:79.9904 | RT:1284 | XCMS_group:11 | file:1 | scan:NA
+MS$FOCUSED_ION: PRECURSOR_M/Z 79.9903564453125
+COMMENT:
+PK$NUM_PEAK: 3
+PK$PEAK: m/z int. rel.int.
+51.6917915344238 584.212829589844 31.93
+53.0398750305176 649.807922363281 35.48
+97.3154754638672 596.341003417969 32.59
+
+RECORD_TITLE:  MZ:86.0607 | RT:1497 | XCMS_group:19 | file:1 | scan:NA
+MS$FOCUSED_ION: PRECURSOR_M/Z 86.060661315918
+COMMENT:
+PK$NUM_PEAK: 4
+PK$PEAK: m/z int. rel.int.
+53.0031318664551 9658.7939453125 60.81
+53.1939277648926 1998.81518554688 12.58
+80.3447494506836 2044.23645019531 12.87
+101.307479858398 2181.85522460938 13.73
+
+RECORD_TITLE:  MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA
+MS$FOCUSED_ION: PRECURSOR_M/Z 86.0606307983398
+COMMENT:
+PK$NUM_PEAK: 11
+PK$PEAK: m/z int. rel.int.
+52.6782836914062 1061.12646484375 3.59
+53.0032196044922 15176.8583984375 51.38
+53.1121788024902 1193.6044921875 4.039
+53.9984169006348 2790.28930664062 9.45
+54.0287094116211 999.250427246094 3.38
+56.7024726867676 1171.42797851562 3.96
+69.0346069335938 1878.03894042969 3.36
+72.9083633422852 1256.455078125 4.25
+74.0740356445312 1324.07055664062 4.48
+80.5324630737305 1329.61022949219 4.50
+91.0167770385742 1362.0029296875 4.61
+
+RECORD_TITLE:  MZ:86.0607 | RT:1500 | XCMS_group:19 | file:1 | scan:NA
+MS$FOCUSED_ION: PRECURSOR_M/Z 86.0606536865234
+COMMENT:
+PK$NUM_PEAK: 5
+PK$PEAK: m/z int. rel.int.
+53.0031509399414 29580.330078125 61.35
+55.3490409851074 4989.64990234375 10.35
+61.990592956543 4089.9619140625 8.48
+63.2290992736816 4168.97412109375 8.64
+67.6647109985352 5392.48779296875 11.18
b
diff -r 000000000000 -r fd5c0b39569a test-data/metfrag_massbank.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metfrag_massbank.tabular Wed Feb 05 12:30:06 2020 -0500
[
@@ -0,0 +1,13 @@
+adduct name sample_name CompoundName ExplPeaks FormulasOfExplPeaks FragmenterScore FragmenterScore_Values Identifier InChI InChIKey InChIKey1 InChIKey2 InChIKey3 MaximumTreeDepth MolecularFormula MonoisotopicMass Name NoExplPeaks NumberPeaksUsed OfflineMetFusionScore SMILES Score
+[M+H]+ MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA 5_metfrag_result 61.0115814208984_84.2;62.9908714294434_97.8;63.9986305236816_999.0 61.0115814208984:[C2H6S-H]+;62.9908714294434:[CH3OS]+;63.9986305236816:[CH3OS]+H+ 282.1697651393817 622.0;272.0;272.0 679 InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 IAZDPXIOMUYVGZ-UHFFFAOYSA-N IAZDPXIOMUYVGZ UHFFFAOYSA N 2 C2H6OS 78.014 test 3 5 0.21393611414493724 CS(=O)C 2.0
+[M+H]+ MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA 5_metfrag_result 61.0115814208984_84.2;62.9908714294434_97.8;63.9986305236816_999.0 61.0115814208984:[C2H6S-H]+;62.9908714294434:[CH3OS]+;63.9986305236816:[CH3OS]+H+ 282.1697651393817 622.0;272.0;272.0 HMDB0002151 InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 IAZDPXIOMUYVGZ-UHFFFAOYSA-N IAZDPXIOMUYVGZ UHFFFAOYSA N 2 C2H6OS 78.0139355 Dimethyl sulfoxide 3 5 0.21393611414493724 CS(C)=O 2.0
+[M+H]+ MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA 6_metfrag_result 62.9906845092773_74.1;63.9986686706543_999.0 62.9906845092773:[CH3OS]+;63.9986686706543:[CH3OS]+H+ 245.34661903929023 272.0;272.0 679 InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 IAZDPXIOMUYVGZ-UHFFFAOYSA-N IAZDPXIOMUYVGZ UHFFFAOYSA N 2 C2H6OS 78.014 test 2 4 0.14080908903149097 CS(=O)C 2.0
+[M+H]+ MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA 6_metfrag_result 62.9906845092773_74.1;63.9986686706543_999.0 62.9906845092773:[CH3OS]+;63.9986686706543:[CH3OS]+H+ 245.34661903929023 272.0;272.0 HMDB0002151 InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 IAZDPXIOMUYVGZ-UHFFFAOYSA-N IAZDPXIOMUYVGZ UHFFFAOYSA N 2 C2H6OS 78.0139355 Dimethyl sulfoxide 2 4 0.14080908903149097 CS(C)=O 2.0
+[M+H]+ MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA 7_metfrag_result 61.0115776062012_88.7;62.9908828735352_67.4;63.9986190795898_999.0 61.0115776062012:[C2H6S-H]+;62.9908828735352:[CH3OS]+;63.9986190795898:[CH3OS]+H+ 273.21939470601694 622.0;272.0;272.0 679 InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 IAZDPXIOMUYVGZ-UHFFFAOYSA-N IAZDPXIOMUYVGZ UHFFFAOYSA N 2 C2H6OS 78.014 test 3 8 0.21833426500502406 CS(=O)C 2.0
+[M+H]+ MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA 7_metfrag_result 61.0115776062012_88.7;62.9908828735352_67.4;63.9986190795898_999.0 61.0115776062012:[C2H6S-H]+;62.9908828735352:[CH3OS]+;63.9986190795898:[CH3OS]+H+ 273.21939470601694 622.0;272.0;272.0 HMDB0002151 InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 IAZDPXIOMUYVGZ-UHFFFAOYSA-N IAZDPXIOMUYVGZ UHFFFAOYSA N 2 C2H6OS 78.0139355 Dimethyl sulfoxide 3 8 0.21833426500502406 CS(C)=O 2.0
+[M+H]+ MZ:86.0607 | RT:1497 | XCMS_group:19 | file:1 | scan:NA 9_metfrag_result NA NA 0.0 NA HMDB0002039 InChI=1S/C4H7NO/c6-4-2-1-3-5-4/h1-3H2,(H,5,6) HNJBEVLQSNELDL-UHFFFAOYSA-N HNJBEVLQSNELDL UHFFFAOYSA N 2 C4H7NO 85.05276385 2-Pyrrolidinone 0 3 1.382021738987827 O=C1CCCN1 1.0
+[M+H]+ MZ:86.0607 | RT:1497 | XCMS_group:19 | file:1 | scan:NA 9_metfrag_result NA NA 0.0 NA HMDB0060427 InChI=1S/C4H7NO/c1-4(2,6)3-5/h6H,1-2H3 MWFMGBPGAXYFAR-UHFFFAOYSA-N MWFMGBPGAXYFAR UHFFFAOYSA N 2 C4H7NO 85.05276385 Acetone cyanohydrin 0 3 1.259881333913003 CC(C)(O)C#N 0.9116219364506684
+[M+H]+ MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA 10_metfrag_result 53.9984169006348_183.7 53.9984169006348:[C2HNO-H]+ 28.14129547353099 796.0 HMDB0060427 InChI=1S/C4H7NO/c1-4(2,6)3-5/h6H,1-2H3 MWFMGBPGAXYFAR-UHFFFAOYSA-N MWFMGBPGAXYFAR UHFFFAOYSA N 2 C4H7NO 85.05276385 Acetone cyanohydrin 1 10 1.5355220212517242 CC(C)(O)C#N 1.9157496211754683
+[M+H]+ MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA 10_metfrag_result NA NA 0.0 NA HMDB0002039 InChI=1S/C4H7NO/c6-4-2-1-3-5-4/h1-3H2,(H,5,6) HNJBEVLQSNELDL-UHFFFAOYSA-N HNJBEVLQSNELDL UHFFFAOYSA N 2 C4H7NO 85.05276385 2-Pyrrolidinone 0 10 1.6767924176487323 O=C1CCCN1 1.0
+[M+H]+ MZ:86.0607 | RT:1500 | XCMS_group:19 | file:1 | scan:NA 11_metfrag_result NA NA 0.0 NA HMDB0002039 InChI=1S/C4H7NO/c6-4-2-1-3-5-4/h1-3H2,(H,5,6) HNJBEVLQSNELDL-UHFFFAOYSA-N HNJBEVLQSNELDL UHFFFAOYSA N 2 C4H7NO 85.05276385 2-Pyrrolidinone 0 5 1.3746691837774077 O=C1CCCN1 1.0
+[M+H]+ MZ:86.0607 | RT:1500 | XCMS_group:19 | file:1 | scan:NA 11_metfrag_result NA NA 0.0 NA HMDB0060427 InChI=1S/C4H7NO/c1-4(2,6)3-5/h6H,1-2H3 MWFMGBPGAXYFAR-UHFFFAOYSA-N MWFMGBPGAXYFAR UHFFFAOYSA N 2 C4H7NO 85.05276385 Acetone cyanohydrin 0 5 1.2539223673403028 CC(C)(O)C#N 0.9121630004789162
b
diff -r 000000000000 -r fd5c0b39569a test-data/metfrag_msp.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metfrag_msp.tabular Wed Feb 05 12:30:06 2020 -0500
[
@@ -0,0 +1,13 @@
+adduct name sample_name CompoundName ExplPeaks FormulasOfExplPeaks FragmenterScore FragmenterScore_Values Identifier InChI InChIKey InChIKey1 InChIKey2 InChIKey3 MaximumTreeDepth MolecularFormula MonoisotopicMass Name NoExplPeaks NumberPeaksUsed OfflineMetFusionScore SMILES Score
+[M+H]+ MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA 5_metfrag_result 61.0115814208984_84.2;62.9908714294434_97.8;63.9986305236816_999.0 61.0115814208984:[C2H6S-H]+;62.9908714294434:[CH3OS]+;63.9986305236816:[CH3OS]+H+ 282.1697651393817 622.0;272.0;272.0 679 InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 IAZDPXIOMUYVGZ-UHFFFAOYSA-N IAZDPXIOMUYVGZ UHFFFAOYSA N 2 C2H6OS 78.014 test 3 5 0.21393611414493724 CS(=O)C 2.0
+[M+H]+ MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA 5_metfrag_result 61.0115814208984_84.2;62.9908714294434_97.8;63.9986305236816_999.0 61.0115814208984:[C2H6S-H]+;62.9908714294434:[CH3OS]+;63.9986305236816:[CH3OS]+H+ 282.1697651393817 622.0;272.0;272.0 HMDB0002151 InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 IAZDPXIOMUYVGZ-UHFFFAOYSA-N IAZDPXIOMUYVGZ UHFFFAOYSA N 2 C2H6OS 78.0139355 Dimethyl sulfoxide 3 5 0.21393611414493724 CS(C)=O 2.0
+[M+H]+ MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA 6_metfrag_result 62.9906845092773_74.1;63.9986686706543_999.0 62.9906845092773:[CH3OS]+;63.9986686706543:[CH3OS]+H+ 245.34661903929023 272.0;272.0 679 InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 IAZDPXIOMUYVGZ-UHFFFAOYSA-N IAZDPXIOMUYVGZ UHFFFAOYSA N 2 C2H6OS 78.014 test 2 4 0.14080908903149097 CS(=O)C 2.0
+[M+H]+ MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA 6_metfrag_result 62.9906845092773_74.1;63.9986686706543_999.0 62.9906845092773:[CH3OS]+;63.9986686706543:[CH3OS]+H+ 245.34661903929023 272.0;272.0 HMDB0002151 InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 IAZDPXIOMUYVGZ-UHFFFAOYSA-N IAZDPXIOMUYVGZ UHFFFAOYSA N 2 C2H6OS 78.0139355 Dimethyl sulfoxide 2 4 0.14080908903149097 CS(C)=O 2.0
+[M+H]+ MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA 7_metfrag_result 61.0115776062012_88.7;62.9908828735352_67.4;63.9986190795898_999.0 61.0115776062012:[C2H6S-H]+;62.9908828735352:[CH3OS]+;63.9986190795898:[CH3OS]+H+ 273.21939470601694 622.0;272.0;272.0 679 InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 IAZDPXIOMUYVGZ-UHFFFAOYSA-N IAZDPXIOMUYVGZ UHFFFAOYSA N 2 C2H6OS 78.014 test 3 8 0.21833426500502406 CS(=O)C 2.0
+[M+H]+ MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA 7_metfrag_result 61.0115776062012_88.7;62.9908828735352_67.4;63.9986190795898_999.0 61.0115776062012:[C2H6S-H]+;62.9908828735352:[CH3OS]+;63.9986190795898:[CH3OS]+H+ 273.21939470601694 622.0;272.0;272.0 HMDB0002151 InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 IAZDPXIOMUYVGZ-UHFFFAOYSA-N IAZDPXIOMUYVGZ UHFFFAOYSA N 2 C2H6OS 78.0139355 Dimethyl sulfoxide 3 8 0.21833426500502406 CS(C)=O 2.0
+[M+H]+ MZ:86.0607 | RT:1497 | XCMS_group:19 | file:1 | scan:NA 9_metfrag_result NA NA 0.0 NA HMDB0002039 InChI=1S/C4H7NO/c6-4-2-1-3-5-4/h1-3H2,(H,5,6) HNJBEVLQSNELDL-UHFFFAOYSA-N HNJBEVLQSNELDL UHFFFAOYSA N 2 C4H7NO 85.05276385 2-Pyrrolidinone 0 3 1.382021738987827 O=C1CCCN1 1.0
+[M+H]+ MZ:86.0607 | RT:1497 | XCMS_group:19 | file:1 | scan:NA 9_metfrag_result NA NA 0.0 NA HMDB0060427 InChI=1S/C4H7NO/c1-4(2,6)3-5/h6H,1-2H3 MWFMGBPGAXYFAR-UHFFFAOYSA-N MWFMGBPGAXYFAR UHFFFAOYSA N 2 C4H7NO 85.05276385 Acetone cyanohydrin 0 3 1.259881333913003 CC(C)(O)C#N 0.9116219364506684
+[M+H]+ MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA 10_metfrag_result 53.9984169006348_183.7 53.9984169006348:[C2HNO-H]+ 28.14129547353099 796.0 HMDB0060427 InChI=1S/C4H7NO/c1-4(2,6)3-5/h6H,1-2H3 MWFMGBPGAXYFAR-UHFFFAOYSA-N MWFMGBPGAXYFAR UHFFFAOYSA N 2 C4H7NO 85.05276385 Acetone cyanohydrin 1 10 1.5355220212517242 CC(C)(O)C#N 1.9157496211754683
+[M+H]+ MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA 10_metfrag_result NA NA 0.0 NA HMDB0002039 InChI=1S/C4H7NO/c6-4-2-1-3-5-4/h1-3H2,(H,5,6) HNJBEVLQSNELDL-UHFFFAOYSA-N HNJBEVLQSNELDL UHFFFAOYSA N 2 C4H7NO 85.05276385 2-Pyrrolidinone 0 10 1.6767924176487323 O=C1CCCN1 1.0
+[M+H]+ MZ:86.0607 | RT:1500 | XCMS_group:19 | file:1 | scan:NA 11_metfrag_result NA NA 0.0 NA HMDB0002039 InChI=1S/C4H7NO/c6-4-2-1-3-5-4/h1-3H2,(H,5,6) HNJBEVLQSNELDL-UHFFFAOYSA-N HNJBEVLQSNELDL UHFFFAOYSA N 2 C4H7NO 85.05276385 2-Pyrrolidinone 0 5 1.3746691837774077 O=C1CCCN1 1.0
+[M+H]+ MZ:86.0607 | RT:1500 | XCMS_group:19 | file:1 | scan:NA 11_metfrag_result NA NA 0.0 NA HMDB0060427 InChI=1S/C4H7NO/c1-4(2,6)3-5/h6H,1-2H3 MWFMGBPGAXYFAR-UHFFFAOYSA-N MWFMGBPGAXYFAR UHFFFAOYSA N 2 C4H7NO 85.05276385 Acetone cyanohydrin 0 5 1.2539223673403028 CC(C)(O)C#N 0.9121630004789162
b
diff -r 000000000000 -r fd5c0b39569a test-data/winter_pos.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/winter_pos.msp Wed Feb 05 12:30:06 2020 -0500
[
@@ -0,0 +1,34 @@
+NAME: pos_27_winter_Marpol_27_2.E.3_01_17272:1
+AlignmentID: 1
+RETENTIONTIME: 720.852
+PRECURSORMZ: 149.13513692
+METABOLITENAME: Unknown
+ADDUCTIONNAME: [M+H]+
+NumPeaks: 3
+121.099329887276 182
+149.133267159026 510
+150.14016979533 114
+
+NAME: pos_27_winter_Marpol_27_2.E.3_01_17272:2
+AlignmentID: 2
+RETENTIONTIME: 650.574
+PRECURSORMZ: 151.11569214
+METABOLITENAME: Unknown
+ADDUCTIONNAME: [M+H]+
+NumPeaks: 3
+136.090178471023 168
+151.110697295085 1052
+152.117423567352 136
+
+NAME: pos_27_winter_Marpol_27_2.E.3_01_17272:3
+AlignmentID: 3
+RETENTIONTIME: 42.174
+PRECURSORMZ: 166.09213257
+METABOLITENAME: Unknown
+ADDUCTIONNAME: [M+H]+
+NumPeaks: 5
+121.067062911877 262
+131.049505932606 146
+137.060202192821 531
+149.059561491169 254
+166.08537028381 545
b
diff -r 000000000000 -r fd5c0b39569a test-data/winter_pos.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/winter_pos.tabular Wed Feb 05 12:30:06 2020 -0500
[
b'@@ -0,0 +1,45 @@\n+adduct\tname\tsample_name\tExplPeaks\tFormulasOfExplPeaks\tFragmenterScore\tFragmenterScore_Values\tIUPACName\tIdentifier\tInChI\tInChIKey\tInChIKey1\tInChIKey2\tMaximumTreeDepth\tMolecularFormula\tMonoisotopicMass\tNoExplPeaks\tNumberPeaksUsed\tOfflineMetFusionScore\tSMILES\tScore\tXlogP3\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:1\t1_metfrag_result\tNA\tNA\t0.0\tNA\tdimethyl(dipropyl)-lambda4-sulfane\t71774044\tInChI=1S/C8H20S/c1-5-7-9(3,4)8-6-2/h5-8H2,1-4H3\tOPMSGHPOQIQQRS-UHFFFAOYSA-N\tOPMSGHPOQIQQRS\tUHFFFAOYSA\t2\tC8H20S\t148.128572\t0\t1\t0.37031226614982965\tCCCS(C)(C)CCC\t1.0\t2.7\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:1\t1_metfrag_result\tNA\tNA\t0.0\tNA\tbutyl-ethyl-dimethyl-lambda4-sulfane\t90984195\tInChI=1S/C8H20S/c1-5-7-8-9(3,4)6-2/h5-8H2,1-4H3\tHCGXHQGSWBNABQ-UHFFFAOYSA-N\tHCGXHQGSWBNABQ\tUHFFFAOYSA\t2\tC8H20S\t148.128572\t0\t1\t0.37031226614982965\tCCCCS(C)(C)CC\t1.0\t2.5\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:1\t1_metfrag_result\tNA\tNA\t0.0\tNA\ttrimethyl(3-methylbutyl)-lambda4-sulfane\t118334050\tInChI=1S/C8H20S/c1-8(2)6-7-9(3,4)5/h8H,6-7H2,1-5H3\tDFXVYXYADDYUJD-UHFFFAOYSA-N\tDFXVYXYADDYUJD\tUHFFFAOYSA\t2\tC8H20S\t148.128572\t0\t1\t0.3692650195260169\tCC(C)CCS(C)(C)C\t0.9971719904536216\t2.6\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:1\t1_metfrag_result\tNA\tNA\t0.0\tNA\tbutyl-trimethyl-methylidene-lambda6-sulfane\t123293059\tInChI=1S/C8H20S/c1-6-7-8-9(2,3,4)5/h2,6-8H2,1,3-5H3\tVKPVIYIHVYLQST-UHFFFAOYSA-N\tVKPVIYIHVYLQST\tUHFFFAOYSA\t2\tC8H20S\t148.128572\t0\t1\t0.3684934475101417\tCCCCS(=C)(C)(C)C\t0.9950884191371828\t2.8\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:1\t1_metfrag_result\tNA\tNA\t0.0\tNA\ttrimethyl(pentyl)-lambda4-sulfane\t123234955\tInChI=1S/C8H20S/c1-5-6-7-8-9(2,3)4/h5-8H2,1-4H3\tDOMKJZGKNOYRJJ-UHFFFAOYSA-N\tDOMKJZGKNOYRJJ\tUHFFFAOYSA\t2\tC8H20S\t148.128572\t0\t1\t0.36833824834252354\tCCCCCS(C)(C)C\t0.9946693156350715\t2.7\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:1\t1_metfrag_result\tNA\tNA\t0.0\tNA\ttrimethyl(2-methylbutyl)-lambda4-sulfane\t89554176\tInChI=1S/C8H20S/c1-6-8(2)7-9(3,4)5/h8H,6-7H2,1-5H3\tVYPWEAXBOOOKCI-UHFFFAOYSA-N\tVYPWEAXBOOOKCI\tUHFFFAOYSA\t2\tC8H20S\t148.128572\t0\t1\t0.3670899392001932\tCCC(C)CS(C)(C)C\t0.9912983521093177\t2.6\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:1\t1_metfrag_result\tNA\tNA\t0.0\tNA\tethyl-dimethyl-(2-methylpropyl)-lambda4-sulfane\t123581531\tInChI=1S/C8H20S/c1-6-9(4,5)7-8(2)3/h8H,6-7H2,1-5H3\tHCQWOWHBTFNYPP-UHFFFAOYSA-N\tHCQWOWHBTFNYPP\tUHFFFAOYSA\t2\tC8H20S\t148.128572\t0\t1\t0.3670899392001932\tCCS(C)(C)CC(C)C\t0.9912983521093177\t2.6\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:1\t1_metfrag_result\tNA\tNA\t0.0\tNA\tdiethyl-methyl-propyl-lambda4-sulfane\t87264764\tInChI=1S/C8H20S/c1-5-8-9(4,6-2)7-3/h5-8H2,1-4H3\tFEBVEQKYAVNHNG-UHFFFAOYSA-N\tFEBVEQKYAVNHNG\tUHFFFAOYSA\t2\tC8H20S\t148.128572\t0\t1\t0.3665198706303188\tCCCS(C)(CC)CC\t0.9897589254632564\t2.5\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:1\t1_metfrag_result\tNA\tNA\t0.0\tNA\tdimethyl-propan-2-yl-propyl-lambda4-sulfane\t123984437\tInChI=1S/C8H20S/c1-6-7-9(4,5)8(2)3/h8H,6-7H2,1-5H3\tYKCWUJTZJUTSRN-UHFFFAOYSA-N\tYKCWUJTZJUTSRN\tUHFFFAOYSA\t2\tC8H20S\t148.128572\t0\t1\t0.36642602760587445\tCCCS(C)(C)C(C)C\t0.9895055095410131\t2.6\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:1\t1_metfrag_result\tNA\tNA\t0.0\tNA\ttrimethyl(pentan-2-yl)-lambda4-sulfane\t123872005\tInChI=1S/C8H20S/c1-6-7-8(2)9(3,4)5/h8H,6-7H2,1-5H3\tKBFUVSMTQRNXGQ-UHFFFAOYSA-N\tKBFUVSMTQRNXGQ\tUHFFFAOYSA\t2\tC8H20S\t148.128572\t0\t1\t0.366385855541442\tCCCC(C)S(C)(C)C\t0.9893970279483019\t2.6\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:1\t1_metfrag_result\tNA\tNA\t0.0\tNA\ttert-butyl-ethyl-dimethyl-lambda4-sulfane\t123948136\tInChI=1S/C8H20S/c1-7-9(5,6)8(2,3)4/h7H2,1-6H3\tZYNXVEVNBFJHMI-UHFFFAOYSA-N\tZYNXVEVNBFJHMI\tUHFFFAOYSA\t2\tC8H20S\t148.128572\t0\t1\t0.3656746377878431\tCCS(C)(C)C(C)(C)C\t0.9874764387088648\t2.3\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:1\t1_metfrag_result\tNA\tNA\t0.0\tNA\t2,2-dimethylpropyl(trimethyl)-lambda4-sulfane\t123803452\tInChI=1S/C8H20S/c1-8(2,3)7-9(4,5)6/h7H2,1-6H3\tZHZXMNSILACQJD-UHFFFAOYSA-N\tZHZXMNSILACQJD\tUHFFFAOYSA\t2\tC8H20S\t148.128572\t0\t1\t0.36455776513'..b"SHMRJFADGKMKZ-UHFFFAOYSA-N\tQSHMRJFADGKMKZ\tUHFFFAOYSA\t2\tC7H18OS\t150.107836\t1\t1\t0.022740280431331264\tCCS(CC)(CC)OC\t1.8754932764394985\t2.1\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:2\t2_metfrag_result\tNA\tNA\t0.0\tNA\t6-hydroxy-1-aza-6-boranuidabicyclo[4.4.0]deca-3,8-diene\t139044256\tInChI=1S/C8H13BNO/c11-9-5-1-3-7-10(9)8-4-2-6-9/h1-4,11H,5-8H2/q-1\tDIGXBCMFCAGNEI-UHFFFAOYSA-N\tDIGXBCMFCAGNEI\tUHFFFAOYSA\t2\tC8H13BNO\t150.109019\t0\t1\t0.023090160119030666\t[B-]12(CC=CCN1CC=CC2)O\t1.0\tNA\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:2\t2_metfrag_result\tNA\tNA\t0.0\tNA\tspiro[1-azoniabicyclo[2.2.2]octane-3,2'-oxirane]-1-ylboron\t22620779\tInChI=1S/C8H13BNO/c9-10-3-1-7(2-4-10)8(5-10)6-11-8/h7H,1-6H2/q+1\tPLFWYAVASHIDNJ-UHFFFAOYSA-N\tPLFWYAVASHIDNJ\tUHFFFAOYSA\t2\tC8H13BNO\t150.109019\t0\t1\t0.023087816756411212\t[B][N+]12CCC(CC1)C3(C2)CO3\t0.9998985125002436\tNA\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:2\t2_metfrag_result\tNA\tNA\t0.0\tNA\t(4-ethynylpiperidin-1-yl)-methoxyboron\t88891894\tInChI=1S/C8H13BNO/c1-3-8-4-6-10(7-5-8)9-11-2/h1,8H,4-7H2,2H3\tKMQRCHOIXNGVRQ-UHFFFAOYSA-N\tKMQRCHOIXNGVRQ\tUHFFFAOYSA\t2\tC8H13BNO\t150.109019\t0\t1\t0.02306511967018231\t[B](N1CCC(CC1)C#C)OC\t0.9989155359374179\tNA\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:2\t2_metfrag_result\tNA\tNA\t0.0\tNA\t(6-propan-2-yl-4H-pyridin-1-ium-3-ylidene)borinic acid\t140680971\tInChI=1S/C8H12BNO/c1-6(2)8-4-3-7(9-11)5-10-8/h4-6,11H,3H2,1-2H3/p+1\tUBGKRZUUANICLB-UHFFFAOYSA-O\tUBGKRZUUANICLB\tUHFFFAOYSA\t2\tC8H13BNO\t150.109019\t0\t1\t0.02303376056789623\tB(=C1CC=C([NH+]=C1)C(C)C)O\t0.9975574205270257\tNA\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:2\t2_metfrag_result\tNA\tNA\t0.0\tNA\t(1-propyl-3H-pyridin-1-ium-4-ylidene)borinic acid\t68349908\tInChI=1S/C8H13BNO/c1-2-5-10-6-3-8(9-11)4-7-10/h3,6-7,11H,2,4-5H2,1H3/q+1\tSQSPCGRPLQBVME-UHFFFAOYSA-N\tSQSPCGRPLQBVME\tUHFFFAOYSA\t2\tC8H13BNO\t150.109019\t0\t1\t0.02302038444191081\tB(=C1CC=[N+](C=C1)CCC)O\t0.9969781206903652\tNA\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:2\t2_metfrag_result\tNA\tNA\t0.0\tNA\t(1-propan-2-yl-3H-pyridin-1-ium-4-ylidene)borinic acid\t68196356\tInChI=1S/C8H13BNO/c1-7(2)10-5-3-8(9-11)4-6-10/h3,5-7,11H,4H2,1-2H3/q+1\tMTVNCMONVSIFOD-UHFFFAOYSA-N\tMTVNCMONVSIFOD\tUHFFFAOYSA\t2\tC8H13BNO\t150.109019\t0\t1\t0.02301995709621174\tB(=C1CC=[N+](C=C1)C(C)C)O\t0.9969596129928495\tNA\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:2\t2_metfrag_result\tNA\tNA\t0.0\tNA\t1-(difluoromethyl)-1-methylpiperidin-1-ium\t24750697\tInChI=1S/C7H14F2N/c1-10(7(8)9)5-3-2-4-6-10/h7H,2-6H2,1H3/q+1\tSIBIOKOZJHIJMS-UHFFFAOYSA-N\tSIBIOKOZJHIJMS\tUHFFFAOYSA\t2\tC7H14F2N\t150.109431\t0\t1\t0.023002603052633866\tC[N+]1(CCCCC1)C(F)F\t0.9962080355465082\t2.1\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:2\t2_metfrag_result\tNA\tNA\t0.0\tNA\t[(2S)-1-(3,3-difluorocyclobutyl)propan-2-yl]azanium\t144763422\tInChI=1S/C7H13F2N/c1-5(10)2-6-3-7(8,9)4-6/h5-6H,2-4,10H2,1H3/p+1/t5-/m0/s1\tPFPVDGBOJWFZCE-YFKPBYRVSA-O\tPFPVDGBOJWFZCE\tYFKPBYRVSA\t2\tC7H14F2N\t150.109431\t0\t1\t0.022930394247447423\tC[C@@H](CC1CC(C1)(F)F)[NH3+]\t0.9930807811310254\t1.5\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:3\t3_metfrag_result\tNA\tNA\t0.0\tNA\t[1-(2,2-difluoroethylcarbamoyl)cyclopropyl]azanium\t140645036\tInChI=1S/C6H10F2N2O/c7-4(8)3-10-5(11)6(9)1-2-6/h4H,1-3,9H2,(H,10,11)/p+1\tVBDLBVWQUBMPID-UHFFFAOYSA-O\tVBDLBVWQUBMPID\tUHFFFAOYSA\t2\tC6H11F2N2O\t165.083944\t0\t4\t1.5659197717037268\tC1CC1(C(=O)NCC(F)F)[NH3+]\t1.0\t-0.2\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:3\t3_metfrag_result\tNA\tNA\t0.0\tNA\t2-methylphosphanyl-N-(2-methylphosphanylethyl)ethanamine\t5366868\tInChI=1S/C6H17NP2/c1-8-5-3-7-4-6-9-2/h7-9H,3-6H2,1-2H3\tRVTLJXOPJDPRHE-UHFFFAOYSA-N\tRVTLJXOPJDPRHE\tUHFFFAOYSA\t2\tC6H17NP2\t165.083624\t0\t4\t1.4513834802368626\tCPCCNCCPC\t0.9268568584824444\t-0.8\r\n+[M+H]+\tpos_27_winter_Marpol_27_2.E.3_01_17272:3\t3_metfrag_result\tNA\tNA\t0.0\tNA\t[6-(dimethylamino)pyridin-3-yl]oxy-hydroxyboron\t58378282\tInChI=1S/C7H10BN2O2/c1-10(2)7-4-3-6(5-9-7)12-8-11/h3-5,11H,1-2H3\tQHOASYSLTRWFLE-UHFFFAOYSA-N\tQHOASYSLTRWFLE\tUHFFFAOYSA\t2\tC7H10BN2O2\t165.083533\t0\t4\t1.4368216184360907\t[B](O)OC1=CN=C(C=C1)N(C)C\t0.9175576197449906\tNA\r\n"