comparison golm_ws_lib_search.xml @ 0:e3d43b8c987b draft

Init repository with last tool-bank-golm-lib_search master version
author fgiacomoni
date Mon, 05 Dec 2016 08:32:04 -0500
parents
children 11779b6402bc
comparison
equal deleted inserted replaced
-1:000000000000 0:e3d43b8c987b
1 <tool id="golm_ws_lib_search" name="Golm Metabolome Database search spectrum" version="2016-11-28">
2 <description>
3 : GC-MS Mass Spectral Database.
4 </description>
5
6 <!--<requirements>
7 <requirement type="package" version="0.413">perl-list-moreutils</requirement>
8 <requirement type="package" version="1.19">perl-soap-lite</requirement>
9 <requirement type="package" version="0.95">perl-excel-writer-xlsx</requirement>
10 <requirement type="package" version="2.90">perl-json</requirement>
11 <requirement type="package" version="2.95">perl-html-template</requirement>
12 </requirements>-->
13
14 <stdio>
15 <exit_code range="1" level="fatal" />
16 </stdio>
17
18 <command><![CDATA[
19 perl $__tool_directory__/golm_ws_lib_search.pl
20 #if str($input_type.choice) == "YES":
21 -inputFile "${input_type.masses}"
22 -output_tabular "$GolmOutTab" -output_html "$GolmOutWeb" -output_xls "$GolmOutXlsx"
23 #else:
24 -inputMasses "${input_type.mass}"
25 -output_tabular "$GolmOutTab2" -output_html "$GolmOutWeb2" -output_xls "$GolmResXlsx2"
26 #end if
27 -ri "$ri"
28 -riWindow "$riWindow"
29 -gcColumn "$columnType"
30 -mzRes "$mzRes"
31 -maxHits "$maxHits"
32 -maxIons "$maxIons"
33 -JaccardDistanceThreshold "$JaccardDistanceThreshold"
34 -s12GowerLegendreDistanceThreshold "$s12GowerLegendreDistanceThreshold"
35 -DotproductDistanceThreshold "$DotproductDistanceThreshold"
36 -HammingDistanceThreshold "$HammingDistanceThreshold"
37 -EuclideanDistanceThreshold "$EuclideanDistanceThreshold"
38 -relative "$intensity_type"
39
40 ]]></command>
41 <inputs>
42 <conditional name="input_type">
43 <param name="choice" type="select" label="Would you use a file" help="If 'NO' is selected then one or more mass(es) must be entered manually.">
44 <option value="YES">YES</option>
45 <option value="NO">NO</option>
46 </param>
47 <when value="YES">
48 <param name="masses" label="File of masses (format: msp)" format="msp,txt" type="data" help=".msp output file from metaMS.runGC function, or any msp file." />
49 </when>
50 <when value="NO">
51 <param name="mass" size="30" area="true" type="text" label="Masses and intensities (entered manually)" help="For a list of masses + intensities write : mz1 int1 mz2 int2 mzx intx..."/>
52 </when>
53 </conditional>
54 <param name="columnType" label="Column" type="select" display="radio" help="5%-phenyl-95%-dimethylpolysiloxane (VAR5), 35%-phenyl-65%-dimethylpolysiloxane (MDN35).">
55 <option value="VAR5">VAR5</option>
56 <option value="MDN35">MDN35</option>
57 </param>
58 <param name="ri" type="integer" value="1898" label="Alkane Retention Index" help="If neither an alkane RIs for VAR5 nor MDN35 is available in your setup, please select 'none' in the input field above!" />
59 <param name="riWindow" type="integer" value="5" label="Retention Index Window" help="This value is for the library search used only. A larger window size will increase the number of matches.
60 At the same time the identification becomes less reliable due to false matching spectra without RI consensus." />
61 <param name="maxHits" type="integer" value="100" label="Maximum Hits" help="Maximum number of hits per queried spectra, default=all (0)." />
62 <param name="mzRes" type="integer" value="0" label="Number of significant decimal" help="Number of significant decimals of your m/z." />
63 <param name="maxIons" type="integer" value="0" label="Maximum number of ions" help="Number of m/z per spectra you want to keep for the queries, default 0 = all of them." />
64 <param name="JaccardDistanceThreshold" type="float" value="0.9" label="JaccardDistance" help="Number of matches, divided by the sum of matches and mismatches." />
65 <param name="s12GowerLegendreDistanceThreshold" type="float" value="0.9" label="s12GowerLegendreDistanceThreshold" help="The distance measure S12GowLeg = sqrt(1 - s12) is derived from the S12 coefficient of Gower &amp; Legendre defined as s12 = a / sqrt((a + b)(a + c))." />
66 <param name="DotproductDistanceThreshold" type="float" value="0.5" label="DotproductDistanceThreshold" help="The Dotproduct distance is summing the multiplied intensities over all matching peaks within both spectra. Here, to satisfy the conditions of a metric I) non-negativity, II) identity of indiscernibles, III) symmetry and IV) subadditivity / triangle inequality, we use 1-Dotproduct. Both spectra are normalised prior to the spectral vector norm in that way, that the absolute value of the squared intensities is equal to 1." />
67 <param name="HammingDistanceThreshold" type="float" value="500" label="HammingDistanceThreshold" help="In information theory, the Hamming distance between two strings of equal length is the number of positions for which the corresponding symbols are different. Put another way, it measures the minimum number of substitutions required to change one into the other, or the number of errors that transformed one string into the other." />
68 <param name="EuclideanDistanceThreshold" type="float" value="0.5" label="EuclideanDistanceThreshold" help="The Euclid is the square root of the sum of the squared differences over all matching peaks." />
69 <param name="intensity_type" label="Type of intensities" type="select" display="radio" help="Would you work with relative or absolute intensities? Example: relative = percentage, absolute = untouched. Relative is preferred">
70 <option value="true">YES</option>
71 <option value="false">NO</option>
72 </param>
73 </inputs>
74
75 <outputs>
76 <data name="GolmOutTab" label="${input_type.masses.name[:-6]}.GOLM.tabular" format="tabular">
77 <filter>input_type['choice'] == "YES"</filter>
78 </data>
79 <data name="GolmOutWeb" label="${input_type.masses.name[:-6]}.GOLM_WEB.html" format="html">
80 <filter>input_type['choice'] == "YES"</filter>
81 </data>
82 <data name="GolmOutXlsx" label="${input_type.masses.name[:-6]}.GOLM.txt" format="tabular">
83 <filter>input_type['choice'] == "YES"</filter>
84 </data>
85 <data name="GolmOutTab2" label="GOLM.tabular" format="tabular" >
86 <filter>input_type['choice'] == "NO"</filter>
87 </data
88 <data name="GolmOutWeb2" label="HMDB_WEB.html" format="html">
89 <filter>input_type['choice'] == "NO"</filter>
90 </data
91 <data name="GolmOutXlsx2" label="HMDB.txt" format="tabular">
92 <filter>input_type['choice'] == "NO"</filter>
93 </data
94 </outputs>
95
96 <tests>
97 <test>
98 <!--test 1 few results - too restrictive thresholds -->
99 <param name="choice" value="YES"/>
100 <param name="masses" value="input01_peakspectra_test.msp"/>
101 <param name="columnType" value="VAR5"/>
102 <param name="ri" value="1898"/>
103 <param name="riWindow" value="5"/>
104 <param name="maxHits" value="10"/>
105 <param name="mzRes" value="0"/>
106 <param name="maxIons" value="0"/>
107 <param name="JaccardDistanceThreshold" value="0.9"/>
108 <param name="s12GowerLegendreDistanceThreshold" value="0.9"/>
109 <param name="DotproductDistanceThreshold" value="0.5"/>
110 <param name="EuclideanDistanceThreshold" value="0.5"/>
111 <param name="HammingDistanceThreshold" value="500"/>
112 <param name="intensity_type" value="YES"/>
113 <output name="GolmOutXlsx" file="output01.txt"/>
114 <output name="GolmOutWeb" file="output01.html"/>
115 <output name="GolmOutTab" file="output01.tabular"/>
116 </test>
117 <test>
118 <!--test 2 results - default thresholds -->
119 <param name="choice" value="NO"/>
120 <param name="mass" value="70 3 71 3 72 16 73 999 74 87 75 78 76 4 77 5 81 1 82 6 83 13 84 4 85 3 86 4 87 5 88 4 89 52 90 4 91 2 97 2 98 1 99 4 100 12 101 16 102 9 103 116 104 11 105 26 106 2 107 1 111 1 112 1 113 4 114 11 115 7 116 5 117 93 118 9 119 8 126 1 127 3 128 3 129 101 130 19 131 25 132 4 133 60 134 8 135 4 140 1 141 1 142 4 143 13 144 2 145 6 146 1 147 276 148 44 149 27 150 3 151 1 156 1 157 70 158 12 159 5 160 148 161 26 162 7 163 8 164 1 168 1 169 2 170 1 172 3 173 4 174 1 175 4 177 4 186 2 187 1 189 28 190 7 191 13 192 2 193 1 201 5 202 1 203 3 204 23 205 162 206 31 207 16 208 2 210 2 214 1 215 2 216 8 217 88 218 18 219 8 220 1 221 6 222 1 229 23 230 6 231 11 232 3 233 4 234 3 235 1 243 1 244 2 245 1 246 2 247 1 256 1 262 3 263 1 269 2 270 1 274 4 275 1 277 4 278 1 291 7 292 2 293 1 300 1 305 4 306 1 307 4 308 1 318 1 319 122 320 37 321 17 322 3 323 1 343 1 364 2 365 1"/>
121 <param name="columnType" value="VAR5"/>
122 <param name="ri" value="1898"/>
123 <param name="riWindow" value="5"/>
124 <param name="maxHits" value="10"/>
125 <param name="mzRes" value="0"/>
126 <param name="maxIons" value="0"/>
127 <param name="JaccardDistanceThreshold" value="0.9"/>
128 <param name="s12GowerLegendreDistanceThreshold" value="0.9"/>
129 <param name="DotproductDistanceThreshold" value="0.5"/>
130 <param name="EuclideanDistanceThreshold" value="0.5"/>
131 <param name="HammingDistanceThreshold" value="500"/>
132 <param name="intensity_type" value="YES"/>
133 <output name="GolmOutXlsx2" file="output02.txt"/>
134 <output name="GolmOutWeb2" file="output02.html"/>
135 <output name="GolmOutTab2" file="output02.tabular"/>
136 </test>
137 <test>
138 <!--test 3 lot of results - restrictive thresholds -->
139 <param name="choice" value="YES"/>
140 <param name="masses" value="input03_peakspectra_full.msp"/>
141 <param name="columnType" value="VAR5"/>
142 <param name="ri" value="1898"/>
143 <param name="riWindow" value="5"/>
144 <param name="maxHits" value="10"/>
145 <param name="mzRes" value="0"/>
146 <param name="maxIons" value="0"/>
147 <param name="JaccardDistanceThreshold" value="0.9"/>
148 <param name="s12GowerLegendreDistanceThreshold" value="0.9"/>
149 <param name="DotproductDistanceThreshold" value="0.5"/>
150 <param name="EuclideanDistanceThreshold" value="0.5"/>
151 <param name="HammingDistanceThreshold" value="500"/>
152 <param name="intensity_type" value="YES"/>
153 <output name="GolmOutXlsx" file="output03.txt"/>
154 <output name="GolmOutWeb" file="output03.html"/>
155 <output name="GolmOutTab" file="output03.tabular"/>
156 </test>
157 </tests>
158
159 <help><![CDATA[
160
161 .. class:: infomark
162
163 **Authors**
164
165 | Gabriel Cretin (for perl and Galaxy), Yann Guitton (for R version and tests) and Franck Giacomoni (for perl and Galaxy)
166
167 ---------------------------------------------------
168
169 .. class:: infomark
170
171 **If you use this tool, please cite MassBank**
172
173 for `Golm Metabolome Database &lt;http://gmd.mpimp-golm.mpg.de/&gt;`_ :
174 `Hummel, J., Strehmel, N., Selbig, J., Walther, D. and Kopka, J. (2010) Decision tree supported substructure prediction of metabolites from GC-MS profiles, Metabolomics. &lt;http://dx.doi.org/10.1007/s11306-010-0198-7&gt;`_
175
176
177 -----------
178 Description
179 -----------
180
181 The Golm Metabolome Database (GMD) facilitates the search for and dissemination of reference mass spectra from biologically active metabolites quantified using gas chromatography (GC) coupled to mass spectrometry (MS).
182 This tool intends to facilitate the annotation of masses from GC-MS by searching informations through GMD webservices.
183
184 -----------
185 Input files
186 -----------
187
188 | **Parameter**: inputSpectra
189 | **Format** : msp
190
191
192 A file containing spectra in the msp format.
193 Example of a spectra in msp format:
194
195 | Name: Unknown1
196 | DB.idx: -1
197 | rt: 10.58
198 | Class: Unknown
199 | rt.sd: 0.003
200 | Num Peaks: 19
201 | 73.0465 826983.38; 74.0481 70018.08; 75.0319 69475.73; 100.0573 37477.24; 103.0227 43054.28;
202 | 116.0884 1433179.62; 117.0905 151975.23; 118.0869 53105.64; 128.0526 26404.77; 131.0359 22647.44;
203 | 133.0438 22141.56; 147.0666 255488.28; 48.066 49965.66; 149.0551 37762.38; 190.1069 72568.23;
204 | 191.1063 18017.34; 192.1023 6460.8; 207.0333 35435.81; 218.1028 30528.82;
205
206 ----------
207 Parameters
208 ----------
209
210 **Would you use a file**
211
212 | Choose whether the masses are in a file or entered manually
213 | YES (default) : parameters **File of masses (format: msp)** is visible
214 | NO : parameter **Masses of the molecule (entered manually)** is visible
215 | For both, all other parameters are available
216
217
218 **Column type**
219
220 | VAR5 means a 5%-phenyl-95%-dimethylpolysiloxane column and MDN35 means a 35%-phenyl-65%-dimethylpolysiloxane column. If you don't know select 'None'.
221
222
223 **Alkane Retention Index**
224
225 | If neither an alkane RIs for VAR5 nor MDN35 is available in your setup, please select 'none' in the input field above!
226
227
228 **Retention Index Window**
229
230 | This value is for the library search used only. A larger window size will increase the number of matches.
231 | At the same time the identification becomes less reliable due to false matching spectra without RI consensus.
232 | The maximal number of hits returned from the data base is limited due to performance reasons.
233
234
235 **Maximum Hits**
236
237 | Maximum number of hits returned by Golm database, default = 0 (which means all of them are taken in account).
238
239
240 **Number of significant decimal**
241
242 | Number of significant decimals of your m/z.
243 | Example: m/z = 73.798 if mzRes = 4, m/z becomes 73.7980
244 | m/z = 73.798 if mzRes = 0, m/z becomes 74
245
246
247 **Maximum number of ions**
248
249 | Number of m/z and intensities per spectra you want to keep for the queries to Golm, default = 0 = all of them.
250
251
252 **JaccardDistanceThreshold**
253
254 | Number of matches (a mass with appropriate intensity in both spectra) divided by the sum of matches and mismatches (a mass where only one of both spectra has a intensity).
255 | The jaccard distance is a binary distance.
256
257
258 **s12GowerLegendreDistanceThreshold**
259
260 | The distance measure S12GowLeg = sqrt(1 - s12) is derived from the S12 coefficient of Gower &amp; Legendre defined as s12 = a / sqrt((a + b)(a + c)), with "a" representing the number of positions at which both spectra are in "on-state" and "b" respectively "c" representing the number of positions at which only the query spectrum or the hit spectrum are in "on-state".
261
262
263 **DotproductDistanceThreshold**
264
265 | The Dotproduct distance is summing the multiplied intensities over all matching peaks within both spectra. Here, to satisfy the conditions of a metric I) non-negativity, II) identity of indiscernibles, III) symmetry and IV) subadditivity / triangle inequality, we use 1-Dotproduct. Both spectra are normalised prior to the spectral vector norm in that way, that the absolute value of the squared intensities is equal to 1.
266
267
268 **HammingDistanceThreshold**
269
270 | In information theory, the Hamming distance between two strings of equal length is the number of positions for which the corresponding symbols are different. Put another way, it measures the minimum number of substitutions required to change one into the other, or the number of errors that transformed one string into the other.
271
272
273 **EuclideanDistanceThreshold**
274
275 | The Euclid is the square root of the sum of the squared differences over all matching peaks.
276
277
278 **Type of intensities**
279
280 | Use absolute or relative intensities.
281 | Example: relative = percentage (intensity * 100) / max(intensities), absolute = untouched
282
283
284 ------------
285 Output files
286 ------------
287
288 **Tree types of files**
289
290 | GOLM.html : to view results on a webpage (HTML).
291 | GOLM.xlsx : to get results in a excel like format.
292 | GOLM.tabular : to get results in tabular format.
293
294 ---------------------------------------------------
295
296
297 ---------------
298 Working example
299 ---------------
300
301
302 .. class:: warningmark
303
304 Refer to the corresponding W4M HowTo section: http://workflow4metabolomics.org/howto
305 | Format Data For Postprocessing
306 | Perform GCMS Annotations
307
308
309 ]]></help>
310
311 <citations>
312 <citation type="doi">10.1093/bioinformatics/btu813</citation>
313 </citations>
314
315 </tool>
316