changeset 4:8fb51147d15e draft

"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit fdeefc696443bc3aadf08d4df7226cb6f91d0388"
author computational-metabolomics
date Fri, 04 Feb 2022 10:09:40 +0000
parents 4cbfd3d0a4c4
children 57c4e7421085
files sirius_csifingerid.py sirius_csifingerid.xml test-data/CCMSLIB00000578155_result.tsv test-data/cf_annotation_generic_msp_result.tsv test-data/cf_canopus_generic_msp_result.tsv
diffstat 5 files changed, 39 insertions(+), 29 deletions(-) [+]
line wrap: on
line diff
--- a/sirius_csifingerid.py	Wed Feb 02 17:29:46 2022 +0000
+++ b/sirius_csifingerid.py	Fri Feb 04 10:09:40 2022 +0000
@@ -390,16 +390,18 @@
                 ad = paramds[fn.split(os.sep)[-2]]['additional_details']
 
                 for line in reader:
-                    if 'rank' in line and \
-                            0 < int(rank_filter) < int(line['rank']):
+                    if 'rank' in line \
+                            and 0 < int(rank_filter) < int(line['rank']):
                         # filter out those annotations greater than rank filter
                         # If rank_filter is zero then skip
                         continue
 
-                    if 'ConfidenceScore' in line \
-                            and 0 < int(confidence_filter) < int(line['rank']):
-                        # filter out those annotations greater than rank filter
-                        # If rank_filter is zero then skip
+                    if ('ConfidenceScore' in line
+                        and 0 < float(confidence_filter)
+                        and float(line['ConfidenceScore']) <
+                            float(confidence_filter)):
+                        # filter out those annotations that are less than
+                        # the confidence filter value
                         continue
                     line.update(ad)
 
@@ -414,13 +416,13 @@
         os.system(s2)
 
 
-concat_output('canopus_summary.tsv',
-              args.canopus_result_pth,
+concat_output('compound_identifications.tsv',
+              args.annotations_result_pth,
               args.rank_filter,
               args.confidence_filter,
               args.backwards_compatible)
-concat_output('compound_identifications.tsv',
-              args.annotations_result_pth,
+concat_output('canopus_summary.tsv',
+              args.canopus_result_pth,
               0,
               0,
               False)
--- a/sirius_csifingerid.xml	Wed Feb 02 17:29:46 2022 +0000
+++ b/sirius_csifingerid.xml	Fri Feb 04 10:09:40 2022 +0000
@@ -1,5 +1,5 @@
 <tool id="sirius_csifingerid" name="SIRIUS-CSI:FingerID"
-      version="4.9.8+galaxy0" profile="21.01">
+      version="4.9.8+galaxy0" profile="19.05">
     <description>is used to identify metabolites using single and
         tandem mass spectrometry</description>
     <requirements>
@@ -61,13 +61,13 @@
                label="MSP file (output from Create MSP tool)" />
         <param argument="--database" type="select"
                label="Select SIRIUS-CSI:FingerID Database" >
-            <option value="PubChem" >PubChem</option>
-            <option value="hmdb">HMDB</option>
-            <option value="kegg">KEGG</option>
-            <option value="knapsack">KNApSAcK</option>
-            <option value="biocyc">BioCyc</option>
-            <option selected="true" value="bio">Bio (all biological)</option>
-            <option  value="all">All use all databases</option>
+            <option value="PUBCHEM" >PubChem</option>
+            <option value="HMDB">HMDB</option>
+            <option value="KEGG">KEGG</option>
+            <option value="KNAPSACK">KNApSAcK</option>
+            <option value="BIOCYC">BioCyc</option>
+            <option selected="true" value="BIO">Bio (all biological)</option>
+            <option value="ALL">All (use all databases)</option>
         </param>
         <param argument="--ppm_max" type="integer" value="10" min="0"
                label="Mass deviation of the fragment peaks in ppm" />
@@ -143,13 +143,13 @@
                label="Only show the top ranked annotations less than or equal
                       to this value (default to show all annotations)"/>
 
-        <param argument="--confidence_filter" type="integer" value="0"
+        <param argument="--confidence_filter" type="float" value="0"
                label="Only show annotations greater than or or equal
                       to this value (default to show all annotations)"/>
 
         <param argument="--backwards_compatible" type="boolean" checked="false"
                label="Makes the outputs compatible with annotation workflows that used the old output from
-                      SIRIUS:CSI:FingerID v4.0.1"/>
+                      SIRIUS-CSI:FingerID v4.0.1"/>
     </inputs>
     <outputs>
         <data name="canopus_results" format="tsv" label="${tool.name} on ${on_string}: CANOPUS"
@@ -209,6 +209,13 @@
             <output name="annotation_results" file="bc_annotation_CCMSLIB00000578155_result.tsv"/>
             <output name="canopus_results" file="bc_canopus_CCMSLIB00000578155_result.tsv"/>
         </test>
+        <test>
+            <!-- Test confidence filter  -->
+            <param name="input" value="generic.msp" ftype="msp"/>
+            <param name="confidence_filter" value="0.7"/>
+            <output name="annotation_results" file="cf_annotation_generic_msp_result.tsv"/>
+            <output name="canopus_results" file="cf_canopus_generic_msp_result.tsv"/>
+        </test>
     </tests>
     <help>
 ----------------
--- a/test-data/CCMSLIB00000578155_result.tsv	Wed Feb 02 17:29:46 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-name	adduct	inchikey2D	inchi	molecularFormula	rank	score	name	smiles	xlogp	pubchemids	links
-D-GLUCOSE-6-PHOSPHATE	[M-H]-	NBSCHQHZLSJFNQ	InChI=1S/C6H13O9P/c7-3-2(1-14-16(11,12)13)15-6(10)5(9)4(3)8/h2-10H,1H2,(H2,11,12,13)	C6H13O9P	1	-2956.17597	D-GLUCOSE-6-PHOSPHATE	C(C1C(C(C(C(O)O1)O)O)O)OP(=O)(O)O		208;5958;65127;439198;439284;439404;439427;440100;447096;449526;4178491;4459709;9817215;9859975;10038266;10332946;10422797;10422798;10848963;11499884;11536233;11651816;11651817;11701643;12314997;12598269;16219407;21604864;21604865;23421197;23421199;23421200;24802166;25200774;25244236;42609823;44589902;44629605;46936284;51351673;51351674;59660207;59660208;66804219;70828590;71048769;72200063;89530481;89533633;90087729;90657928;92043642;92144442;92331699;92450038;100983220;101251820;102072969;124302956;124303605	HMDB:(3498);KNApSAcK:(7307);Natural Products:(UNPD119019 UNPD208877);CHEBI:(47944 136602 41076 4141 17665 134068 91004 61567 61667 4170 61548 58735 17719 58225 60332 58247 48066);KEGG:(C02962 C03735 C00275 C02965 C01172 C00092 C00668 C01113);Plantcyc:(MANNOSE-6P CPD-15711 CPD-15712 D-HEXOSE-6-PHOSPHATE GLC-6-P ALPHA-GLC-6-P CPD-1241);YMDB:(2311);Biocyc:(CPD-15712 CPD-1241)
-D-GLUCOSE-6-PHOSPHATE	[M-H]-	HXXFSFRBOHSIMQ	InChI=1S/C6H13O9P/c7-1-2-3(8)4(9)5(10)6(14-2)15-16(11,12)13/h2-10H,1H2,(H2,11,12,13)	C6H13O9P	2	-2968.893	D-GLUCOSE-6-PHOSPHATE	C(C1C(C(C(C(O1)OP(=O)(O)O)O)O)O)O		466;65533;122250;123912;439165;439279;439426;439995;644175;1549075;1549076;3034296;3246168;3551220;5702593;6560208;6560209;7091266;7098639;10084035;11108064;11299931;11536234;11557960;11586967;11637475;11701642;12773693;12773694;15720053;20706002;21120286;22298591;23421196;23421198;23724605;23724607;24802153;24802168;25134172;25244208;25245607;26470622;26470623;26470920;26470921;26470922;40467866;40467867;40467868;40473131;40473132;42609824;44224049;45109780;46173227;46173228;46878478;51397481;57349329;57466719;57616986;57616987;58434201;59383287;59973641;59973642;59985133;60023647;67062884;67062905;67062913;67062918;67794900;68298161;68937634;70124502;70837719;71122101;71728461;88462985;90472756;91265893;91658980;101503810;101747832;101747833;121494054;122545953;125293590;125293595;125293596;125293598	HMDB:(62705);KNApSAcK:(7389);Natural Products:(UNPD85752 UNPD57928 UNPD186485);CHEBI:(16077 17973 75522 24588 53072 58601 57684 60389 58336 60465 53025 16326 80181 58908 18205 16218 58409 57629);KEGG:(C15924 C15926 C01171 C03384 C00636 C00446 C01002 C00103 C00663);Plantcyc:(CPD-9828 GALACTOSE-1P GLC-1-P MANNOSE-1P CPDQT-4 CPD-448 CPD4FS-5);YMDB:(970);Biocyc:(CPD4FS-5)
-D-GLUCOSE-6-PHOSPHATE	[M-H]-	BGWGXPAPYGQALX	InChI=1S/C6H13O9P/c7-2-6(10)5(9)4(8)3(15-6)1-14-16(11,12)13/h3-5,7-10H,1-2H2,(H2,11,12,13)	C6H13O9P	3	-2996.82333	D-GLUCOSE-6-PHOSPHATE	C(C1C(C(C(CO)(O)O1)O)O)OP(=O)(O)O		719;124155;439160;439396;440641;440970;444848;5083448;9543488;15648788;16760431;20843252;21604862;21604863;23421195;24802142;25201714;25245410;42609822;46174048;46878483;52916945;86308139;91746169;92024282;102322321;122174030;124300900;124350439;124524514;124579643	HMDB:(6873);KNApSAcK:(7305);Natural Products:(UNPD153056);CHEBI:(57634 4251 81499 61553 58695 16084 6307 45804 47946 58926 61527);KEGG:(C06312 C18096 C01097 C05345 C00085);Plantcyc:(TAGATOSE-6-PHOSPHATE FRUCTOSE-6P);Biocyc:(L-TAGATOSE-6-PHOSPHATE)
-D-GLUCOSE-6-PHOSPHATE	[M-H]-	PMTUDJVZIGZBIX	InChI=1S/C6H13O9P/c7-1-3-4(9)5(10)6(2-8,14-3)15-16(11,12)13/h3-5,7-10H,1-2H2,(H2,11,12,13)	C6H13O9P	4	-2999.57091	D-GLUCOSE-6-PHOSPHATE	C(C1C(C(C(CO)(O1)OP(=O)(O)O)O)O)O		193537;5176477;6398638;15703397;16069990;21126112;21126113;57357663;99639213;124202606	HMDB:(6800);CHEBI:(27884 57267 12350);KEGG:(C03267);YMDB:(878);Biocyc:(FRUCTOSE-2-PHOSPHATE)
-D-GLUCOSE-6-PHOSPHATE	[M-H]-	RHKKZBWRNHGJEZ	InChI=1S/C6H13O9P/c7-1-3-4(8)5(9)6(10,15-3)2-14-16(11,12)13/h3-5,7-10H,1-2H2,(H2,11,12,13)	C6H13O9P	5	-3000.17545	D-GLUCOSE-6-PHOSPHATE	C(C1C(C(C(COP(=O)(O)O)(O)O1)O)O)O		717;439394;10400369;21627880;23421194;25244216;51397484;52916944;90658050;90658051;90659357;90659358;92209483;97041850	HMDB:(1076);KNApSAcK:(19676);CHEBI:(37515 58674);KEGG:(C01094);Plantcyc:(FRU1P);Biocyc:(CPD-16154 CPD-16158 CPD-16159)
-D-GLUCOSE-6-PHOSPHATE	[M-H]-	INAPMGSXUVUWAF	InChI=1S/C6H13O9P/c7-1-2(8)4(10)6(5(11)3(1)9)15-16(12,13)14/h1-11H,(H2,12,13,14)	C6H13O9P	6	-3061.86763	D-GLUCOSE-6-PHOSPHATE	C1(C(C(C(C(C1O)O)OP(=O)(O)O)O)O)O		9;107737;160886;161368;440043;440194;4449629;5288642;5288700;6323385;7098643;10659045;13072112;18654477;25200523;25200860;25203035;35027167;53924828;59824613;59824614;59824615;59824616;101661021;121400595;121403401	HMDB:(6814);KNApSAcK:(7483);Natural Products:(UNPD107543 UNPD92136 UNPD189294);CHEBI:(58469 18169 62383 37493 18384 58433 18297 64841 58401 64838 84142 84141);KEGG:(C03546 C06155 C01177 C04006);Plantcyc:(1-L-MYO-INOSITOL-1-P D-MYO-INOSITOL-1-MONOPHOSPHATE CPD-6701 CPD-6702 CPD-6746 CPD-9887 D-MYO-INOSITOL-4-PHOSPHATE);YMDB:(2322);Biocyc:(D-MYO-INOSITOL-4-PHOSPHATE CPD-6701 CPD-6702 CPD-6746)
-D-GLUCOSE-6-PHOSPHATE	[M-H]-	GSXOAOHZAIYLCY	InChI=1S/C6H13O9P/c7-1-3(8)5(10)6(11)4(9)2-15-16(12,13)14/h4-7,9-11H,1-2H2,(H2,12,13,14)	C6H13O9P	7	-3108.21629	D-GLUCOSE-6-PHOSPHATE	C(C(=O)C(C(C(COP(=O)(O)O)O)O)O)O		603;69507;151197;5459902;5459952;6602428;20111689;20111690;21114947;21872891;23615358;40467872;40467873;46943428;50909805;87615581	HMDB:(124);KNApSAcK:(19683);Natural Products:(UNPD94448);CHEBI:(57579 61519 134284 15946 15845 61559 47947 134283);Plantcyc:(D-ALLULOSE-6-PHOSPHATE);YMDB:(78);Biocyc:(CPD-15828 CPD-15826 D-ALLULOSE-6-PHOSPHATE)
-D-GLUCOSE-6-PHOSPHATE	[M-H]-	ZKLLSNQJRLJIGT	InChI=1S/C6H13O9P/c7-1-3(8)5(10)6(11)4(9)2-15-16(12,13)14/h3,5-8,10-11H,1-2H2,(H2,12,13,14)	C6H13O9P	8	-3116.86489	D-GLUCOSE-6-PHOSPHATE	C(C(C(C(C(=O)COP(=O)(O)O)O)O)O)O		481;65246;151033;439837;440076;6101730;11129032;11737049;14844438;20111955;21145035;23615304;54551858;54551860;54551861;54551863;91010818	HMDB:(60467);KNApSAcK:(19630);CHEBI:(38342 218 18105);KEGG:(C03654 C02888);YMDB:(655);Biocyc:(CPD-15970 CPD0-1116 CPD-531)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cf_annotation_generic_msp_result.tsv	Fri Feb 04 10:09:40 2022 +0000
@@ -0,0 +1,4 @@
+name	adduct	rank	formulaRank	#adducts	#predictedFPs	ConfidenceScore	CSI:FingerIDScore	ZodiacScore	SiriusScore	molecularFormula	adduct	InChIkey2D	InChI	name	smiles	xlogp	pubchemids	links	dbflags	ionMass	retentionTimeInSeconds	id
+MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA	[M+H]+	1	1	1	1	0.8516968844490043	-30.428891568077617	N/A	19.249416937845428	C2H6OS	[M+H]+	IAZDPXIOMUYVGZ	InChI=1S/C2H6OS/c1-4(2)3/h1-2H3	MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA	CS(=O)C	-0.600000024	679;75151;12206145;12264368;10103116;18594457;20151975;20719893;21022526;22345572;57247813;71309204;76973052;90811807;90817578	HMDB:(2151);PubChem class - food;Maconda:(CON00016);ZINC bio:(ZINC05224188);HSDB:(67-68-5);MeSH:(679);Plantcyc:(DMSO);PubMed;NORMAN:(NS00001957);Natural Products:(UNPD148866);PubChem:(679 75151 12206145 12264368 10103116 18594457 20151975 20719893 21022526 22345572 57247813 71309204 76973052 90811807 90817578);CHEBI:(28262);PubChem class - bio and metabolites;KEGG:(C11143);PubChem class - safety and toxic;PubChem class - drug;Biocyc:(DMSO)	252096366	79.0218658447266	NaN	0_unknown_
+MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA	[M+H]+	1	1	1	1	0.8955620660537568	-41.80129625695348	N/A	14.61674883848477	C2H6OS	[M+H]+	IAZDPXIOMUYVGZ	InChI=1S/C2H6OS/c1-4(2)3/h1-2H3	MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA	CS(=O)C	-0.600000024	679;75151;12206145;12264368;10103116;18594457;20151975;20719893;21022526;22345572;57247813;71309204;76973052;90811807;90817578	HMDB:(2151);PubChem class - food;Maconda:(CON00016);ZINC bio:(ZINC05224188);HSDB:(67-68-5);MeSH:(679);Plantcyc:(DMSO);PubMed;NORMAN:(NS00001957);Natural Products:(UNPD148866);PubChem:(679 75151 12206145 12264368 10103116 18594457 20151975 20719893 21022526 22345572 57247813 71309204 76973052 90811807 90817578);CHEBI:(28262);PubChem class - bio and metabolites;KEGG:(C11143);PubChem class - safety and toxic;PubChem class - drug;Biocyc:(DMSO)	252096366	79.0218811035156	NaN	0_unknown_
+MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA	[M+H]+	1	1	1	1	0.8320051497664187	-29.61962034150095	N/A	27.955014068311474	C2H6OS	[M+H]+	IAZDPXIOMUYVGZ	InChI=1S/C2H6OS/c1-4(2)3/h1-2H3	MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA	CS(=O)C	-0.600000024	679;75151;12206145;12264368;10103116;18594457;20151975;20719893;21022526;22345572;57247813;71309204;76973052;90811807;90817578	HMDB:(2151);PubChem class - food;Maconda:(CON00016);ZINC bio:(ZINC05224188);HSDB:(67-68-5);MeSH:(679);Plantcyc:(DMSO);PubMed;NORMAN:(NS00001957);Natural Products:(UNPD148866);PubChem:(679 75151 12206145 12264368 10103116 18594457 20151975 20719893 21022526 22345572 57247813 71309204 76973052 90811807 90817578);CHEBI:(28262);PubChem class - bio and metabolites;KEGG:(C11143);PubChem class - safety and toxic;PubChem class - drug;Biocyc:(DMSO)	252096366	79.0218887329102	NaN	0_unknown_
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cf_canopus_generic_msp_result.tsv	Fri Feb 04 10:09:40 2022 +0000
@@ -0,0 +1,6 @@
+name	adduct	name	molecularFormula	adduct	most specific class	level 5	subclass	class	superclass	all classifications
+MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA	[M+H]+	MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA	C4H7NO	[M+H]+	Carbonyl compounds		Carbonyl compounds	Organooxygen compounds	Organic oxygen compounds	Organic compounds; Organonitrogen compounds; Organooxygen compounds; Carbonyl compounds; Organic oxides; Hydrocarbon derivatives; Organopnictogen compounds; Organic oxygen compounds; Organic nitrogen compounds; Chemical entities
+MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA	[M+H]+	MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA	C4H9N	[M+H]+	Amines		Amines	Organonitrogen compounds	Organic nitrogen compounds	Organic compounds; Organonitrogen compounds; Amines; Hydrocarbon derivatives; Organopnictogen compounds; Organic nitrogen compounds; Chemical entities
+MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA	[M+H]+	MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA	C2H6OS	[M+H]+	Sulfoxides			Sulfoxides	Organosulfur compounds	Organic compounds; Organosulfur compounds; Sulfoxides; Sulfinyl compounds; Organic oxides; Hydrocarbon derivatives; Organic oxygen compounds; Chemical entities
+MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA	[M+H]+	MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA	C2H6OS	[M+H]+	Sulfonyls			Sulfonyls	Organosulfur compounds	Organic compounds; Organosulfur compounds; Sulfonyls; Organic oxides; Hydrocarbon derivatives; Organic oxygen compounds; Chemical entities
+MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA	[M+H]+	MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA	C2H6OS	[M+H]+	Sulfoxides			Sulfoxides	Organosulfur compounds	Organic compounds; Organosulfur compounds; Sulfoxides; Sulfinyl compounds; Organic oxides; Hydrocarbon derivatives; Organic oxygen compounds; Chemical entities