Mercurial > repos > computational-metabolomics > sirius_csifingerid
changeset 4:8fb51147d15e draft
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit fdeefc696443bc3aadf08d4df7226cb6f91d0388"
author | computational-metabolomics |
---|---|
date | Fri, 04 Feb 2022 10:09:40 +0000 |
parents | 4cbfd3d0a4c4 |
children | 57c4e7421085 |
files | sirius_csifingerid.py sirius_csifingerid.xml test-data/CCMSLIB00000578155_result.tsv test-data/cf_annotation_generic_msp_result.tsv test-data/cf_canopus_generic_msp_result.tsv |
diffstat | 5 files changed, 39 insertions(+), 29 deletions(-) [+] |
line wrap: on
line diff
--- a/sirius_csifingerid.py Wed Feb 02 17:29:46 2022 +0000 +++ b/sirius_csifingerid.py Fri Feb 04 10:09:40 2022 +0000 @@ -390,16 +390,18 @@ ad = paramds[fn.split(os.sep)[-2]]['additional_details'] for line in reader: - if 'rank' in line and \ - 0 < int(rank_filter) < int(line['rank']): + if 'rank' in line \ + and 0 < int(rank_filter) < int(line['rank']): # filter out those annotations greater than rank filter # If rank_filter is zero then skip continue - if 'ConfidenceScore' in line \ - and 0 < int(confidence_filter) < int(line['rank']): - # filter out those annotations greater than rank filter - # If rank_filter is zero then skip + if ('ConfidenceScore' in line + and 0 < float(confidence_filter) + and float(line['ConfidenceScore']) < + float(confidence_filter)): + # filter out those annotations that are less than + # the confidence filter value continue line.update(ad) @@ -414,13 +416,13 @@ os.system(s2) -concat_output('canopus_summary.tsv', - args.canopus_result_pth, +concat_output('compound_identifications.tsv', + args.annotations_result_pth, args.rank_filter, args.confidence_filter, args.backwards_compatible) -concat_output('compound_identifications.tsv', - args.annotations_result_pth, +concat_output('canopus_summary.tsv', + args.canopus_result_pth, 0, 0, False)
--- a/sirius_csifingerid.xml Wed Feb 02 17:29:46 2022 +0000 +++ b/sirius_csifingerid.xml Fri Feb 04 10:09:40 2022 +0000 @@ -1,5 +1,5 @@ <tool id="sirius_csifingerid" name="SIRIUS-CSI:FingerID" - version="4.9.8+galaxy0" profile="21.01"> + version="4.9.8+galaxy0" profile="19.05"> <description>is used to identify metabolites using single and tandem mass spectrometry</description> <requirements> @@ -61,13 +61,13 @@ label="MSP file (output from Create MSP tool)" /> <param argument="--database" type="select" label="Select SIRIUS-CSI:FingerID Database" > - <option value="PubChem" >PubChem</option> - <option value="hmdb">HMDB</option> - <option value="kegg">KEGG</option> - <option value="knapsack">KNApSAcK</option> - <option value="biocyc">BioCyc</option> - <option selected="true" value="bio">Bio (all biological)</option> - <option value="all">All use all databases</option> + <option value="PUBCHEM" >PubChem</option> + <option value="HMDB">HMDB</option> + <option value="KEGG">KEGG</option> + <option value="KNAPSACK">KNApSAcK</option> + <option value="BIOCYC">BioCyc</option> + <option selected="true" value="BIO">Bio (all biological)</option> + <option value="ALL">All (use all databases)</option> </param> <param argument="--ppm_max" type="integer" value="10" min="0" label="Mass deviation of the fragment peaks in ppm" /> @@ -143,13 +143,13 @@ label="Only show the top ranked annotations less than or equal to this value (default to show all annotations)"/> - <param argument="--confidence_filter" type="integer" value="0" + <param argument="--confidence_filter" type="float" value="0" label="Only show annotations greater than or or equal to this value (default to show all annotations)"/> <param argument="--backwards_compatible" type="boolean" checked="false" label="Makes the outputs compatible with annotation workflows that used the old output from - SIRIUS:CSI:FingerID v4.0.1"/> + SIRIUS-CSI:FingerID v4.0.1"/> </inputs> <outputs> <data name="canopus_results" format="tsv" label="${tool.name} on ${on_string}: CANOPUS" @@ -209,6 +209,13 @@ <output name="annotation_results" file="bc_annotation_CCMSLIB00000578155_result.tsv"/> <output name="canopus_results" file="bc_canopus_CCMSLIB00000578155_result.tsv"/> </test> + <test> + <!-- Test confidence filter --> + <param name="input" value="generic.msp" ftype="msp"/> + <param name="confidence_filter" value="0.7"/> + <output name="annotation_results" file="cf_annotation_generic_msp_result.tsv"/> + <output name="canopus_results" file="cf_canopus_generic_msp_result.tsv"/> + </test> </tests> <help> ----------------
--- a/test-data/CCMSLIB00000578155_result.tsv Wed Feb 02 17:29:46 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ -name adduct inchikey2D inchi molecularFormula rank score name smiles xlogp pubchemids links -D-GLUCOSE-6-PHOSPHATE [M-H]- NBSCHQHZLSJFNQ InChI=1S/C6H13O9P/c7-3-2(1-14-16(11,12)13)15-6(10)5(9)4(3)8/h2-10H,1H2,(H2,11,12,13) C6H13O9P 1 -2956.17597 D-GLUCOSE-6-PHOSPHATE C(C1C(C(C(C(O)O1)O)O)O)OP(=O)(O)O 208;5958;65127;439198;439284;439404;439427;440100;447096;449526;4178491;4459709;9817215;9859975;10038266;10332946;10422797;10422798;10848963;11499884;11536233;11651816;11651817;11701643;12314997;12598269;16219407;21604864;21604865;23421197;23421199;23421200;24802166;25200774;25244236;42609823;44589902;44629605;46936284;51351673;51351674;59660207;59660208;66804219;70828590;71048769;72200063;89530481;89533633;90087729;90657928;92043642;92144442;92331699;92450038;100983220;101251820;102072969;124302956;124303605 HMDB:(3498);KNApSAcK:(7307);Natural Products:(UNPD119019 UNPD208877);CHEBI:(47944 136602 41076 4141 17665 134068 91004 61567 61667 4170 61548 58735 17719 58225 60332 58247 48066);KEGG:(C02962 C03735 C00275 C02965 C01172 C00092 C00668 C01113);Plantcyc:(MANNOSE-6P CPD-15711 CPD-15712 D-HEXOSE-6-PHOSPHATE GLC-6-P ALPHA-GLC-6-P CPD-1241);YMDB:(2311);Biocyc:(CPD-15712 CPD-1241) -D-GLUCOSE-6-PHOSPHATE [M-H]- HXXFSFRBOHSIMQ InChI=1S/C6H13O9P/c7-1-2-3(8)4(9)5(10)6(14-2)15-16(11,12)13/h2-10H,1H2,(H2,11,12,13) C6H13O9P 2 -2968.893 D-GLUCOSE-6-PHOSPHATE C(C1C(C(C(C(O1)OP(=O)(O)O)O)O)O)O 466;65533;122250;123912;439165;439279;439426;439995;644175;1549075;1549076;3034296;3246168;3551220;5702593;6560208;6560209;7091266;7098639;10084035;11108064;11299931;11536234;11557960;11586967;11637475;11701642;12773693;12773694;15720053;20706002;21120286;22298591;23421196;23421198;23724605;23724607;24802153;24802168;25134172;25244208;25245607;26470622;26470623;26470920;26470921;26470922;40467866;40467867;40467868;40473131;40473132;42609824;44224049;45109780;46173227;46173228;46878478;51397481;57349329;57466719;57616986;57616987;58434201;59383287;59973641;59973642;59985133;60023647;67062884;67062905;67062913;67062918;67794900;68298161;68937634;70124502;70837719;71122101;71728461;88462985;90472756;91265893;91658980;101503810;101747832;101747833;121494054;122545953;125293590;125293595;125293596;125293598 HMDB:(62705);KNApSAcK:(7389);Natural Products:(UNPD85752 UNPD57928 UNPD186485);CHEBI:(16077 17973 75522 24588 53072 58601 57684 60389 58336 60465 53025 16326 80181 58908 18205 16218 58409 57629);KEGG:(C15924 C15926 C01171 C03384 C00636 C00446 C01002 C00103 C00663);Plantcyc:(CPD-9828 GALACTOSE-1P GLC-1-P MANNOSE-1P CPDQT-4 CPD-448 CPD4FS-5);YMDB:(970);Biocyc:(CPD4FS-5) -D-GLUCOSE-6-PHOSPHATE [M-H]- BGWGXPAPYGQALX InChI=1S/C6H13O9P/c7-2-6(10)5(9)4(8)3(15-6)1-14-16(11,12)13/h3-5,7-10H,1-2H2,(H2,11,12,13) C6H13O9P 3 -2996.82333 D-GLUCOSE-6-PHOSPHATE C(C1C(C(C(CO)(O)O1)O)O)OP(=O)(O)O 719;124155;439160;439396;440641;440970;444848;5083448;9543488;15648788;16760431;20843252;21604862;21604863;23421195;24802142;25201714;25245410;42609822;46174048;46878483;52916945;86308139;91746169;92024282;102322321;122174030;124300900;124350439;124524514;124579643 HMDB:(6873);KNApSAcK:(7305);Natural Products:(UNPD153056);CHEBI:(57634 4251 81499 61553 58695 16084 6307 45804 47946 58926 61527);KEGG:(C06312 C18096 C01097 C05345 C00085);Plantcyc:(TAGATOSE-6-PHOSPHATE FRUCTOSE-6P);Biocyc:(L-TAGATOSE-6-PHOSPHATE) -D-GLUCOSE-6-PHOSPHATE [M-H]- PMTUDJVZIGZBIX InChI=1S/C6H13O9P/c7-1-3-4(9)5(10)6(2-8,14-3)15-16(11,12)13/h3-5,7-10H,1-2H2,(H2,11,12,13) C6H13O9P 4 -2999.57091 D-GLUCOSE-6-PHOSPHATE C(C1C(C(C(CO)(O1)OP(=O)(O)O)O)O)O 193537;5176477;6398638;15703397;16069990;21126112;21126113;57357663;99639213;124202606 HMDB:(6800);CHEBI:(27884 57267 12350);KEGG:(C03267);YMDB:(878);Biocyc:(FRUCTOSE-2-PHOSPHATE) -D-GLUCOSE-6-PHOSPHATE [M-H]- RHKKZBWRNHGJEZ InChI=1S/C6H13O9P/c7-1-3-4(8)5(9)6(10,15-3)2-14-16(11,12)13/h3-5,7-10H,1-2H2,(H2,11,12,13) C6H13O9P 5 -3000.17545 D-GLUCOSE-6-PHOSPHATE C(C1C(C(C(COP(=O)(O)O)(O)O1)O)O)O 717;439394;10400369;21627880;23421194;25244216;51397484;52916944;90658050;90658051;90659357;90659358;92209483;97041850 HMDB:(1076);KNApSAcK:(19676);CHEBI:(37515 58674);KEGG:(C01094);Plantcyc:(FRU1P);Biocyc:(CPD-16154 CPD-16158 CPD-16159) -D-GLUCOSE-6-PHOSPHATE [M-H]- INAPMGSXUVUWAF InChI=1S/C6H13O9P/c7-1-2(8)4(10)6(5(11)3(1)9)15-16(12,13)14/h1-11H,(H2,12,13,14) C6H13O9P 6 -3061.86763 D-GLUCOSE-6-PHOSPHATE C1(C(C(C(C(C1O)O)OP(=O)(O)O)O)O)O 9;107737;160886;161368;440043;440194;4449629;5288642;5288700;6323385;7098643;10659045;13072112;18654477;25200523;25200860;25203035;35027167;53924828;59824613;59824614;59824615;59824616;101661021;121400595;121403401 HMDB:(6814);KNApSAcK:(7483);Natural Products:(UNPD107543 UNPD92136 UNPD189294);CHEBI:(58469 18169 62383 37493 18384 58433 18297 64841 58401 64838 84142 84141);KEGG:(C03546 C06155 C01177 C04006);Plantcyc:(1-L-MYO-INOSITOL-1-P D-MYO-INOSITOL-1-MONOPHOSPHATE CPD-6701 CPD-6702 CPD-6746 CPD-9887 D-MYO-INOSITOL-4-PHOSPHATE);YMDB:(2322);Biocyc:(D-MYO-INOSITOL-4-PHOSPHATE CPD-6701 CPD-6702 CPD-6746) -D-GLUCOSE-6-PHOSPHATE [M-H]- GSXOAOHZAIYLCY InChI=1S/C6H13O9P/c7-1-3(8)5(10)6(11)4(9)2-15-16(12,13)14/h4-7,9-11H,1-2H2,(H2,12,13,14) C6H13O9P 7 -3108.21629 D-GLUCOSE-6-PHOSPHATE C(C(=O)C(C(C(COP(=O)(O)O)O)O)O)O 603;69507;151197;5459902;5459952;6602428;20111689;20111690;21114947;21872891;23615358;40467872;40467873;46943428;50909805;87615581 HMDB:(124);KNApSAcK:(19683);Natural Products:(UNPD94448);CHEBI:(57579 61519 134284 15946 15845 61559 47947 134283);Plantcyc:(D-ALLULOSE-6-PHOSPHATE);YMDB:(78);Biocyc:(CPD-15828 CPD-15826 D-ALLULOSE-6-PHOSPHATE) -D-GLUCOSE-6-PHOSPHATE [M-H]- ZKLLSNQJRLJIGT InChI=1S/C6H13O9P/c7-1-3(8)5(10)6(11)4(9)2-15-16(12,13)14/h3,5-8,10-11H,1-2H2,(H2,12,13,14) C6H13O9P 8 -3116.86489 D-GLUCOSE-6-PHOSPHATE C(C(C(C(C(=O)COP(=O)(O)O)O)O)O)O 481;65246;151033;439837;440076;6101730;11129032;11737049;14844438;20111955;21145035;23615304;54551858;54551860;54551861;54551863;91010818 HMDB:(60467);KNApSAcK:(19630);CHEBI:(38342 218 18105);KEGG:(C03654 C02888);YMDB:(655);Biocyc:(CPD-15970 CPD0-1116 CPD-531)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cf_annotation_generic_msp_result.tsv Fri Feb 04 10:09:40 2022 +0000 @@ -0,0 +1,4 @@ +name adduct rank formulaRank #adducts #predictedFPs ConfidenceScore CSI:FingerIDScore ZodiacScore SiriusScore molecularFormula adduct InChIkey2D InChI name smiles xlogp pubchemids links dbflags ionMass retentionTimeInSeconds id +MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA [M+H]+ 1 1 1 1 0.8516968844490043 -30.428891568077617 N/A 19.249416937845428 C2H6OS [M+H]+ IAZDPXIOMUYVGZ InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA CS(=O)C -0.600000024 679;75151;12206145;12264368;10103116;18594457;20151975;20719893;21022526;22345572;57247813;71309204;76973052;90811807;90817578 HMDB:(2151);PubChem class - food;Maconda:(CON00016);ZINC bio:(ZINC05224188);HSDB:(67-68-5);MeSH:(679);Plantcyc:(DMSO);PubMed;NORMAN:(NS00001957);Natural Products:(UNPD148866);PubChem:(679 75151 12206145 12264368 10103116 18594457 20151975 20719893 21022526 22345572 57247813 71309204 76973052 90811807 90817578);CHEBI:(28262);PubChem class - bio and metabolites;KEGG:(C11143);PubChem class - safety and toxic;PubChem class - drug;Biocyc:(DMSO) 252096366 79.0218658447266 NaN 0_unknown_ +MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA [M+H]+ 1 1 1 1 0.8955620660537568 -41.80129625695348 N/A 14.61674883848477 C2H6OS [M+H]+ IAZDPXIOMUYVGZ InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA CS(=O)C -0.600000024 679;75151;12206145;12264368;10103116;18594457;20151975;20719893;21022526;22345572;57247813;71309204;76973052;90811807;90817578 HMDB:(2151);PubChem class - food;Maconda:(CON00016);ZINC bio:(ZINC05224188);HSDB:(67-68-5);MeSH:(679);Plantcyc:(DMSO);PubMed;NORMAN:(NS00001957);Natural Products:(UNPD148866);PubChem:(679 75151 12206145 12264368 10103116 18594457 20151975 20719893 21022526 22345572 57247813 71309204 76973052 90811807 90817578);CHEBI:(28262);PubChem class - bio and metabolites;KEGG:(C11143);PubChem class - safety and toxic;PubChem class - drug;Biocyc:(DMSO) 252096366 79.0218811035156 NaN 0_unknown_ +MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA [M+H]+ 1 1 1 1 0.8320051497664187 -29.61962034150095 N/A 27.955014068311474 C2H6OS [M+H]+ IAZDPXIOMUYVGZ InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA CS(=O)C -0.600000024 679;75151;12206145;12264368;10103116;18594457;20151975;20719893;21022526;22345572;57247813;71309204;76973052;90811807;90817578 HMDB:(2151);PubChem class - food;Maconda:(CON00016);ZINC bio:(ZINC05224188);HSDB:(67-68-5);MeSH:(679);Plantcyc:(DMSO);PubMed;NORMAN:(NS00001957);Natural Products:(UNPD148866);PubChem:(679 75151 12206145 12264368 10103116 18594457 20151975 20719893 21022526 22345572 57247813 71309204 76973052 90811807 90817578);CHEBI:(28262);PubChem class - bio and metabolites;KEGG:(C11143);PubChem class - safety and toxic;PubChem class - drug;Biocyc:(DMSO) 252096366 79.0218887329102 NaN 0_unknown_
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cf_canopus_generic_msp_result.tsv Fri Feb 04 10:09:40 2022 +0000 @@ -0,0 +1,6 @@ +name adduct name molecularFormula adduct most specific class level 5 subclass class superclass all classifications +MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA [M+H]+ MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA C4H7NO [M+H]+ Carbonyl compounds Carbonyl compounds Organooxygen compounds Organic oxygen compounds Organic compounds; Organonitrogen compounds; Organooxygen compounds; Carbonyl compounds; Organic oxides; Hydrocarbon derivatives; Organopnictogen compounds; Organic oxygen compounds; Organic nitrogen compounds; Chemical entities +MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA [M+H]+ MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA C4H9N [M+H]+ Amines Amines Organonitrogen compounds Organic nitrogen compounds Organic compounds; Organonitrogen compounds; Amines; Hydrocarbon derivatives; Organopnictogen compounds; Organic nitrogen compounds; Chemical entities +MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA [M+H]+ MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA C2H6OS [M+H]+ Sulfoxides Sulfoxides Organosulfur compounds Organic compounds; Organosulfur compounds; Sulfoxides; Sulfinyl compounds; Organic oxides; Hydrocarbon derivatives; Organic oxygen compounds; Chemical entities +MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA [M+H]+ MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA C2H6OS [M+H]+ Sulfonyls Sulfonyls Organosulfur compounds Organic compounds; Organosulfur compounds; Sulfonyls; Organic oxides; Hydrocarbon derivatives; Organic oxygen compounds; Chemical entities +MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA [M+H]+ MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA C2H6OS [M+H]+ Sulfoxides Sulfoxides Organosulfur compounds Organic compounds; Organosulfur compounds; Sulfoxides; Sulfinyl compounds; Organic oxides; Hydrocarbon derivatives; Organic oxygen compounds; Chemical entities