changeset 5:e7eff0c9baa3 draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_msfinder commit c6bc00f50c232c1c7cbb2e6f838c14a0c1c20e05
author recetox
date Wed, 24 Jan 2024 13:26:32 +0000
parents ae66b58846cd
children
files macros.xml recetox_msfinder.xml test-data/log_smiles.smi test-data/test2_out.msp
diffstat 4 files changed, 166 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Fri Dec 01 09:23:21 2023 +0000
+++ b/macros.xml	Wed Jan 24 13:26:32 2024 +0000
@@ -2,7 +2,7 @@
     <token name="@TOOL_VERSION@">v3.5.2</token>
     <xml name="requirements">
         <requirements>
-            <container type="docker">recetox/recetox-msfinder:v3.5.2-rcx1</container>
+            <container type="docker">recetox/recetox-msfinder:v3.5.2-rcx2</container>
         </requirements>
     </xml>
     <xml name="creator">
@@ -31,7 +31,7 @@
         <param type="float" value="0.01" name="Ms2Tolerance" label="Ms2Tolerance" help="The mass tolerance used for fragment peak matching and annotation." />
         <param name="IsTmsMeoxDerivative" type="boolean" checked="False" truevalue="True" falsevalue="False" label="IsTmsMeoxDerivative"
             help="Check TMS-MeOX derivative to use EIMS database; otherwise, MSMS database is used." />
-        <param type="integer" value="1" name="RelativeAbundanceCutOff" label="RelativeAbundanceCutOff"
+        <param type="integer" value="1" min="0" max="100" name="RelativeAbundanceCutOff" label="RelativeAbundanceCutOff"
             help="Remove peaks with less than specified % relative intensity from the spectra." />
         <param type="float" value="0.001" name="Ms1Tolerance" label="Ms1Tolerance" help="The precursor m/z tolerance to generate formula candidates." />
         <param type="select" name="MassToleranceType" label="MassToleranceType" help="Da stands for Daltons (amu), and Ppm stands for parts per million">
@@ -39,20 +39,38 @@
             <option value="Ppm">Ppm</option>
         </param>
         <param type="integer" value="-1" name="StructurePredictionTimeOut" label="StructurePredictionTimeOut" help="Time out parameter, value of -1 means no timeout" />
-        <param name="LewisAndSeniorCheck" type="boolean" checked="True" truevalue="True" falsevalue="False" label="LewisAndSeniorCheck"
-            help="Generate formula candidates that match the valence rules of formula elements." />
+        <param name="LewisAndSeniorCheck" type="boolean" checked="True" truevalue="True" falsevalue="False" label="LEWIS and SENIOR rules check"
+            help="Generate formula candidates that match the valence rules of formula elements. For an explanation of those rules see 'Seven Golden Rules for heuristic 
+            filtering of molecular formulas obtained by accurate mass spectrometry'" />
+        <param name="element_ratio_check" type="select" value="CommonRange=TRUE" label="element ratio check." 
+            help="Generate formula candidates that satisfy every element ratios (ex. H/C ratio should be between 0 and 3.33 for Common range (99.7%) restriction.)">
+            <option value="CommonRange=TRUE" selected="true">Common Range</option>
+            <option value="ExtendedRange=TRUE">Extended Range</option>
+            <option value="ExtremeRange=TRUE">No restriction</option>
+        </param>
+        <param type="integer" value="2" min="1" max="3" name="TreeDepth" label="Tree Depth"
+            help="The limitation of in silico cleavages, i.e. if the user sets '2', the MS-FINDER program generates fragments up to the second level of product ions."/>
+        <param type="integer" value="20" min="0" max="100" name="IsotopicAbundanceTolerance" label="Isotopic Ratio Tolerance"
+            help="Calculate the isotopic score. The tolerance should be utilized as the sigma value for the Gaussian scoring."/>
+        <param name="ElementProbabilityCheck" type="boolean" checked="True" truevalue="True" falsevalue="False" label="Element probability check"
+            help="Generate formula candidates that satisfy the heuristic rules as described in the Seven Golden Rules paper. For example, if a formula 
+            candidate contains the following element counts, i.e. NOPS all > 1, the element counts of N, O, P, and S should be less than 9, 19, 3, and 2, respectively."/>
+        <param name="IsUseEiFragmentDB" type="boolean" checked="False" truevalue="True" falsevalue="False" label="Use EI fragment database"
+            help="Utilize the fragment ion library for EI-MS spectral mining"/>
     </xml>
-    <!-- Element Checks <xml name="element_check_parameter">
-        <param name="Ocheck" type="boolean" checked="True" truevalue="True" falsevalue="False" label="Ocheck" />
-        <param name="Ncheck" type="boolean" checked="True" truevalue="True" falsevalue="False" label="Ncheck" />
-        <param name="Pcheck" type="boolean" checked="True" truevalue="True" falsevalue="False" label="Pcheck" />
-        <param name="Scheck" type="boolean" checked="True" truevalue="True" falsevalue="False" label="Scheck" />
-        <param name="Fcheck" type="boolean" checked="True" truevalue="True" falsevalue="False" label="Fcheck" />
-        <param name="ClCheck" type="boolean" checked="True" truevalue="True" falsevalue="False" label="ClCheck" />
-        <param name="BrCheck" type="boolean" checked="True" truevalue="True" falsevalue="False" label="BrCheck" />
-        <param name="Icheck" type="boolean" checked="True" truevalue="True" falsevalue="False" label="Icheck" />
-        <param name="SiCheck" type="boolean" checked="True" truevalue="True" falsevalue="False" label="SiCheck" />
-    </xml>  -->
+
+    <xml name="element_select">
+        <option value="Ocheck=True">Oxygen check</option>
+        <option value="Ncheck=True">Nitrogen check</option>
+        <option value="Pcheck=True">Phosphorus check</option>
+        <option value="Scheck=True">Sulfur check</option>
+        <option value="Fcheck=True">Fluorine check</option>
+        <option value="ClCheck=True">Chlorine Check</option>
+        <option value="BrCheck=True">Bromine Check</option>
+        <option value="Icheck=True">Iodine check</option>
+        <option value="SiCheck=True">Silicon Check</option>
+    </xml>
+
     <xml name="output">
         <collection type="list" format="txt" name="output" label="${tool.name} on ${on_string}">
             <discover_datasets pattern="(?P&lt;designation&gt;.+)\.smi" directory="output" ext="smi" />
@@ -74,7 +92,7 @@
         FORMULA, PRECURSORMZ and SMILES (or INCHI) metadata entries are required to be present in the input mass spectral library file.
         Each spectrum has to be separated by a single blank line.
 
-        For detailed documentation of the tool please visit https://mtbinfo-team.github.io/mtbinfo.github.io/MS-FINDER/tutorial
+        For detailed documentation of the tool please visit https://systemsomicslab.github.io/mtbinfo.github.io/MS-FINDER/tutorial
     ]]>
     </token>
 
--- a/recetox_msfinder.xml	Fri Dec 01 09:23:21 2023 +0000
+++ b/recetox_msfinder.xml	Wed Jan 24 13:26:32 2024 +0000
@@ -1,4 +1,4 @@
-<tool id="recetox_msfinder" name="RECETOX MsFinder" version="@TOOL_VERSION@+galaxy3">
+<tool id="recetox_msfinder" name="RECETOX MsFinder" version="@TOOL_VERSION@+galaxy4">
     <description>Annotation of fragment peaks in mass spectral libraries.</description>
     <macros>
         <import>macros.xml</import>
@@ -7,13 +7,13 @@
     <expand macro="creator" />
 
     <expand macro="requirements" />
-    <command>
+    <command detect_errors="exit_code">
         <![CDATA[
         mkdir output;
         sh ${write_param};
-        mono /MsFinder/MsFinder/bin/Debug/MsfinderConsoleApp.exe annotate -i "$input_data" -m 'MSFINDER.INI' -o 'output/result.msp';
+        mono /MsFinder/MsFinder/bin/Debug/MsfinderConsoleApp.exe annotate -i '$input_data' -m 'MSFINDER.INI' -o 'output/result.msp';
+        [ -f output/log_smiles.smi ] && sed -i '1s/^/smiles\n/' output/log_smiles.smi;
         [ -s output/result.msp ] || rm output/result.msp;
-
     ]]>
     </command>
 
@@ -36,16 +36,25 @@
 			"IsUseCcsForFilteringCandidates=True" \
 			"CcsToleranceForStructureElucidation=10" \
 			"CanExcuteMS2AdductSearch=False" \
-            "IsUseXlogpPrediction=False" >MSFINDER.INI
+            "IsUseXlogpPrediction=False" \
+            "ElementProbabilityCheck=$ElementProbabilityCheck" \
+            "IsotopicAbundanceTolerance=$IsotopicAbundanceTolerance" \
+            "TreeDepth=$TreeDepth" \
+            "$element_ratio_check" \
+            "IsUseEiFragmentDB=$IsUseEiFragmentDB" >MSFINDER.INI
+            
+            #set element_selected = "\n".join([str($element) for $element in str($element_selection_option).split(",")])
+            printf "%s\n" "$element_selected" >> MSFINDER.INI
         </configfile>
     </configfiles>
 
     <inputs>
         <expand macro="input" />
         <expand macro="parameter" />
-        <!-- <section name="element_selection" title="Element Selection" expanded="true" help="Generate formula candidates that just contain the elements selected by the users.">
-            <expand macro="element_check_parameter" />
-        </section> -->
+        <param name="element_selection_option" type="select" label="Element selection" multiple="true" optional="true"
+            help="Generate formula candidates that just contain the elements selected by the users. Check 'IsTmsMeoxDerivative' if you want to annotate EI-MS spectra.">
+            <expand macro="element_select" />
+        </param>
     </inputs>
 
     <outputs>
@@ -61,6 +70,15 @@
             <!-- This is set to 16 since msfinder sometimes places double bonds between certain atoms and sometimes not - there are 16 such cases in this file. -->
         </test>
         <test>
+            <param name="input_data" value="test.msp" ftype="msp" />
+            <param name="element_ratio_check" value="ExtendedRange=TRUE"/>
+            <param name="TreeDepth" value="3"/>
+            <param name="element_selection_option" value="Ocheck=True,Ncheck=True,Pcheck=True,Scheck=True,Fcheck=True,ClCheck=True,BrCheck=True,Icheck=True,SiCheck=True"/>
+            <output_collection name="output" type="list">
+                <element name="result" file="test2_out.msp" lines_diff="24"/>
+            </output_collection>
+        </test>
+        <test>
             <param name="input_data" value="test_log.msp" ftype="msp" />
             <output_collection name="output" type="list">
                 <element name="log_smiles" file="log_smiles.smi" />
--- a/test-data/log_smiles.smi	Fri Dec 01 09:23:21 2023 +0000
+++ b/test-data/log_smiles.smi	Wed Jan 24 13:26:32 2024 +0000
@@ -1,1 +1,2 @@
+smiles
 Cl[Si]1Oc2ccccc2O1.[C-]#[O+].[C-]#[O+].[CH]1C=CC=C1.[Fe]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test2_out.msp	Wed Jan 24 13:26:32 2024 +0000
@@ -0,0 +1,105 @@
+NAME: Acetylserotonin_2TMS isomer 2
+SCANNUMBER: -1
+RETENTIONTIME: -1
+RETENTIONINDEX: 2520.736
+PRECURSORMZ: 362.18381
+PRECURSORTYPE: [M]+
+IONMODE: Positive
+SPECTRUMTYPE: Centroid
+FORMULA: C18H30N2O2Si2
+INCHIKEY: BITJWWNGDAOCJT-UHFFFAOYSA-N
+INCHI: 
+SMILES: C[Si](C)(C)Oc1cc2c(cc1)n(cc2CC\N=C(/C)O)[Si](C)(C)C
+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
+COLLISIONENERGY: 70
+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
+INSTRUMENTTYPE: GC-EI-Orbitrap
+IONIZATION: 
+LICENSE: CC BY-NC
+COMMENT: 
+Num Peaks: 84
+73.04681	6882	"Theoretical m/z 73.046802, Mass diff 0 (0.11 ppm), SMILES C[Si](C)C, Annotation [C3H10Si-H]+, Rule of HR True"
+74.04642	3441
+76.01821	5005	"Theoretical m/z 76.018724, Mass diff 0 (0 ppm), Formula C5H2N"
+77.02173	4634
+78.03387	2401
+78.91781	4925
+79.04164	36910	"Theoretical m/z 79.042199, Mass diff 0 (0 ppm), Formula C5H5N"
+80.91575	12768
+81.06992	1588	"Theoretical m/z 81.070425, Mass diff 0 (0 ppm), Formula C6H9"
+89.03854	1372	"Theoretical m/z 89.041721, Mass diff 0.003 (35.73 ppm), SMILES O[Si](C)(C)C, Annotation [C3H10OSi-H]+, Rule of HR True"
+91.05424	1324	"Theoretical m/z 91.057371, Mass diff 0.003 (34.39 ppm), SMILES O[Si](C)(C)C, Annotation [C3H10OSi+H]+, Rule of HR True"
+92.06213	3591	"Theoretical m/z 92.0626, Mass diff 0 (0 ppm), Formula C7H8"
+93.03666	8343	"Theoretical m/z 93.03404, Mass diff -0.003 (0 ppm), Formula C6H5O"
+94.93144	2656
+95.04466	8498	"Theoretical m/z 95.04969, Mass diff 0.004 (0 ppm), Formula C6H7O"
+95.93922	1565
+103.05418	2844	"Theoretical m/z 103.054775, Mass diff 0 (0 ppm), Formula C8H7"
+104.06204	1384
+105.06992	2211	"Theoretical m/z 105.070425, Mass diff 0 (0 ppm), Formula C8H9"
+106.89473	3073
+106.94908	3532
+108.94706	3679	"Theoretical m/z 108.956593, Mass diff 0.009 (0 ppm), Formula C3HOSi2"
+111.04412	1592	"Theoretical m/z 111.044604, Mass diff 0 (0 ppm), Formula C6H7O2"
+113.03857	1585	"Theoretical m/z 113.039125, Mass diff 0 (0 ppm), Formula C9H5"
+115.05422	7216	"Theoretical m/z 115.054775, Mass diff 0 (0 ppm), Formula C9H7"
+120.05695	3247	"Theoretical m/z 120.057515, Mass diff 0 (0 ppm), Formula C8H8O"
+120.94707	4091	"Theoretical m/z 120.956593, Mass diff 0.009 (0 ppm), Formula C4HOSi2"
+120.96468	3380	"Theoretical m/z 120.967826, Mass diff 0.003 (0 ppm), Formula C3HN2Si2"
+122.06804	4192	"Theoretical m/z 122.073165, Mass diff 0.005 (0 ppm), Formula C8H10O"
+123.93423	1315	"Theoretical m/z 123.943682, Mass diff 0.009 (0 ppm), Formula C3O2Si2"
+124.94194	4725	"Theoretical m/z 124.951507, Mass diff 0.009 (0 ppm), Formula C3HO2Si2"
+127.02392	3504	"Theoretical m/z 127.021531, Mass diff -0.003 (0 ppm), Formula C5H7O2Si"
+129.06982	2726	"Theoretical m/z 129.070425, Mass diff 0 (0 ppm), Formula C10H9"
+130.06831	4067	"Theoretical m/z 130.065128, Mass diff 0.003 (24.46 ppm), SMILES C1=CC=C2C(=C1)NCC2C, Annotation [C9H9N-H]+, Rule of HR True"
+141.06976	3435	"Theoretical m/z 141.070425, Mass diff 0 (0 ppm), Formula C11H9"
+145.06465	1876	"Theoretical m/z 145.06534, Mass diff 0 (0 ppm), Formula C10H9O"
+149.06238	3354	"Theoretical m/z 149.060255, Mass diff -0.003 (0 ppm), Formula C9H9O2"
+152.06189	6328	"Theoretical m/z 152.071154, Mass diff 0.009 (0 ppm), Formula C8H10NO2"
+164.94904	1697	"Theoretical m/z 164.957655, Mass diff 0.008 (0 ppm), Formula C4HN2O2Si2"
+167.05533	3051	"Theoretical m/z 167.060378, Mass diff 0.005 (30.22 ppm), SMILES N(=C)CCC2CNC1=CC=CC=C12, Annotation [C11H12N2-5H]+, Rule of HR True"
+170.0343	2446	"Theoretical m/z 170.042052, Mass diff 0.008 (45.59 ppm), SMILES C1=CC=C2C(=C1)CCN2[Si](C)C, Annotation [C10H13NSi-5H]+, Rule of HR True"
+171.95171	10548	"Theoretical m/z 171.943682, Mass diff -0.009 (0 ppm), Formula C7O2Si2"
+172.95517	1555
+173.94968	10065	"Theoretical m/z 173.946756, Mass diff -0.003 (0 ppm), Formula C6NO2Si2"
+174.95305	1284
+178.07762	1953	"Theoretical m/z 178.07825, Mass diff 0 (0 ppm), Formula C14H10"
+183.08034	6206	"Theoretical m/z 183.08099, Mass diff 0 (0 ppm), Formula C13H11O"
+184.08812	6315	"Theoretical m/z 184.097778, Mass diff 0.009 (0 ppm), Formula C8H18NSi2"
+185.09143	2274
+192.98021	1819	"Theoretical m/z 192.985814, Mass diff 0.005 (0 ppm), Formula C10HN2OSi"
+194.07246	1646	"Theoretical m/z 194.073165, Mass diff 0 (0 ppm), Formula C14H10O"
+197.97484	2121	"Theoretical m/z 197.980001, Mass diff 0.005 (0 ppm), Formula C13NSi"
+198.97534	5042	"Theoretical m/z 198.978391, Mass diff 0.003 (0 ppm), Formula C8H3N2OSi2"
+204.08408	1545	"Theoretical m/z 204.083912, Mass diff 0 (0.83 ppm), SMILES O(C1=CC=C2NCCC2(=C1))[Si](C)(C)C, Annotation [C11H15NOSi-H]+, Rule of HR True"
+208.03126	3799	"Theoretical m/z 208.025007, Mass diff -0.007 (0 ppm), Formula C8H10NO2Si2"
+209.01154	5448	"Theoretical m/z 209.013973, Mass diff 0.002 (0 ppm), Formula C15HN2"
+210.99068	3675	"Theoretical m/z 210.996379, Mass diff 0.005 (0 ppm), Formula C10H3N2O2Si"
+211.06046	5392	"Theoretical m/z 211.061058, Mass diff 0 (0 ppm), Formula C9H15O2Si2"
+214.10466	3080	"Theoretical m/z 214.104654, Mass diff 0 (0.03 ppm), SMILES C1=CC=C2C(=C1)C(CN2[Si](C)(C)C)CC, Annotation [C13H19NSi-3H]+, Rule of HR True"
+220.07869	6462	"Theoretical m/z 220.07938, Mass diff 0 (0 ppm), Formula C11H14NO2Si"
+226.0417	4848	"Theoretical m/z 226.050827, Mass diff 0.009 (0 ppm), Formula C12H12NSi2"
+227.02208	14828	"Theoretical m/z 227.024538, Mass diff 0.002 (0 ppm), Formula C15H3N2O"
+230.00099	2388	"Theoretical m/z 230.003074, Mass diff 0.002 (0 ppm), Formula C18N"
+230.09961	2241	"Theoretical m/z 230.099573, Mass diff 0 (0.16 ppm), SMILES O(C1=CC=C2NCC(C2(=C1))CC)[Si](C)(C)C, Annotation [C13H19NOSi-3H]+, Rule of HR True"
+240.94661	1761	"Theoretical m/z 240.956593, Mass diff 0.009 (0 ppm), Formula C14HOSi2"
+254.96269	2484	"Theoretical m/z 254.972243, Mass diff 0.009 (0 ppm), Formula C15H3OSi2"
+257.06641	1341	"Theoretical m/z 257.074096, Mass diff 0.008 (29.9 ppm), SMILES OC(=NCCC2CNC1=CC=C(O[Si]C)C=C12)C, Annotation [C13H18N2O2Si-5H]+, Rule of HR True"
+273.14221	1338	"Theoretical m/z 273.141773, Mass diff 0 (1.6 ppm), SMILES OC(=NCCC2CN(C1=CC=CC=C12)[Si](C)(C)C)C, Annotation [C15H22N2OSi-H]+, Rule of HR True"
+288.12326	15718	"Theoretical m/z 288.123437, Mass diff 0 (0.62 ppm), SMILES O(C1=CC=C2C(=C1)C(CN2[Si](C)(C)C)CC)[Si](C)C, Annotation [C15H25NOSi2-3H]+, Rule of HR True"
+289.12668	5715
+290.13898	125444	"Theoretical m/z 290.139087, Mass diff 0 (0.37 ppm), SMILES O(C1=CC=C2C(=C1)C(CN2[Si](C)(C)C)C)[Si](C)(C)C, Annotation [C15H25NOSi2-H]+, Rule of HR True"
+291.14282	16150
+292.1355	12595
+293.13925	1290	"Theoretical m/z 293.139308, Mass diff 0 (0 ppm), Formula C15H25O2Si2"
+302.1391	2508	"Theoretical m/z 302.139642, Mass diff 0 (0 ppm), Formula C16H24NOSi2"
+303.14676	109060	"Theoretical m/z 303.146918, Mass diff 0 (0.52 ppm), SMILES O(C1=CC=C2C(=C1)C(CN2[Si](C)(C)C)CC)[Si](C)(C)C, Annotation [C16H27NOSi2-2H]+, Rule of HR False"
+304.1492	19700
+305.143	7792
+306.13388	4148	"Theoretical m/z 306.134557, Mass diff 0 (0 ppm), Formula C15H24NO2Si2"
+328.98126	2010
+345.1445	7128	"Theoretical m/z 345.145456, Mass diff 0 (0 ppm), Formula C17H25N2O2Si2"
+360.16812	2877
+362.18381	10542	"Theoretical m/z 362.184022, Mass diff 0 (0.58 ppm), SMILES OC(=NCCC2CN(C1=CC=C(O[Si](C)(C)C)C=C12)[Si](C)(C)C)C, Annotation [C18H30N2O2Si2]+, Rule of HR False"
+363.18671	4306
+