Mercurial > repos > prog > lcmsmatching
comparison lcmsmatching.xml @ 0:e66bb061af06 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
author | prog |
---|---|
date | Tue, 12 Jul 2016 12:02:37 -0400 |
parents | |
children | 253d531a0193 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e66bb061af06 |
---|---|
1 <tool id="lcmsmatching" name="LC/MS matching" version="2.1.3"> | |
2 | |
3 <description>Matching of mz/rt values onto local reference compound database.</description> | |
4 | |
5 <requirements> | |
6 <requirement type="package" version="3.2.2">R</requirement> | |
7 <requirement type="package" version="1.20.0">r-getopt</requirement> | |
8 <requirement type="package" version="1.0.0">r-stringr</requirement> | |
9 <requirement type="package" version="1.8.3">r-plyr</requirement> | |
10 <requirement type="package" version="3.98">r-xml</requirement> | |
11 </requirements> | |
12 | |
13 <!--~~~~~~~ | |
14 ~ COMMAND ~ | |
15 ~~~~~~~~--> | |
16 | |
17 <command><![CDATA[ | |
18 $__tool_directory__/search-mz -i "$mzrtinput" | |
19 | |
20 ## Database | |
21 -d file --url "$dbfile" | |
22 | |
23 ## M/Z matching | |
24 -m $mzmode -p $mzprec -s $mzshift | |
25 | |
26 ## Precursor matching | |
27 #if $prec.match == "true" | |
28 --precursor-match --pos-prec "$prec.pos" --neg-prec "$prec.neg" | |
29 #end if | |
30 #if $prec.match == "true" and $rt.match == "true" | |
31 --precursor-rt-tol $rt.tolz | |
32 #end if | |
33 | |
34 ## Chromatographic columns options and retention matching | |
35 #if $rt.match == "true" and $rt.cols.all == "true" | |
36 --all-cols --rttolx $rt.tolx --rttoly $rt.toly | |
37 #end if | |
38 #if $rt.match == "true" and $rt.cols.all == "false" and $rt.cols.list != "" | |
39 -c "$rt.cols.list" --check-cols --rttolx $rt.tolx --rttoly $rt.toly | |
40 #end if | |
41 | |
42 ## Table outputs | |
43 -o "$mainoutput" --peak-output-file "$peaksoutput" --same-rows --same-cols | |
44 | |
45 ## HTML output | |
46 --html-output-file "$htmloutput" --no-main-table-in-html-output | |
47 | |
48 ## Fields | |
49 --input-col-names "$inputfields" | |
50 --db-fields "$dbfields" | |
51 --db-ms-modes "$dbmsmodes" | |
52 | |
53 ## Ouput setting | |
54 #if $out.enabled == "true" | |
55 --output-col-names "$out.outputfields" | |
56 --molids-sep "$out.molidssep" | |
57 #else | |
58 --molids-sep "|" | |
59 #end if | |
60 | |
61 ]]></command> | |
62 | |
63 <!--~~~~~~ | |
64 ~ INPUTS ~ | |
65 ~~~~~~~--> | |
66 | |
67 <inputs> | |
68 | |
69 <!-- DATABASE --> | |
70 | |
71 <!-- Database file --> | |
72 <param name="dbfile" label="Database file" type="data" format="tabular" help="Decimal: '.', missing: NA, mode: character and numerical, sep: tabular. Retention time values must be in seconds."/> | |
73 | |
74 <!-- File database field names --> | |
75 <param name="dbfields" label="File database column names" type="text" size="256" value="mztheo=mztheo,colrt=colrt,molid=molid,col=col,mode=mode,attr=attr,comp=comp,molnames=molnames,molcomp=molcomp,molmass=molmass,inchi=inchi,inchikey=inchikey,pubchem=pubchem,chebi=chebi,hmdb=hmdb,kegg=kegg" help=""/> | |
76 | |
77 <!-- File database MS modes --> | |
78 <param name="dbmsmodes" label="File database MS modes" type="text" size="32" value="pos=POS,neg=NEG" help=""/> | |
79 | |
80 <!-- INPUT --> | |
81 | |
82 <!-- Input file --> | |
83 <param name="mzrtinput" label="Input file - MZ(/RT) values" type="data" format="tabular" help="Decimal: '.', missing: NA, mode: character and numerical, sep: tabular. RT values must be in seconds."/> | |
84 | |
85 <!-- Input field names --> | |
86 <param name="inputfields" label="Input file column names" type="text" size="32" value="mz=mzmed,rt=rtmed" help=""/> | |
87 | |
88 <!-- M/Z MATCHING --> | |
89 <!-- <conditional name="mz"> | |
90 | |
91 <param name="enabled" label="M/Z matching" type="select"> | |
92 <option value="true">Show</option> | |
93 <option value="false">Hide</option> | |
94 </param> | |
95 | |
96 <when value="true">--> | |
97 <!-- Mode --> | |
98 <param name="mzmode" label="MS mode" type="select" display="radio" multiple="false" help=""> | |
99 <option value="pos">Positive</option> | |
100 <option value="neg">Negative</option> | |
101 </param> | |
102 | |
103 <!-- MZ matching parameters --> | |
104 <param name="mzprec" label="M/Z precision (in ppm)" type="float" help="" value="5"/> | |
105 <param name="mzshift" label="M/Z shift (in ppm)" type="float" help="" value="0"/> | |
106 <!--</when> | |
107 <when value="false"></when> | |
108 </conditional>--> | |
109 | |
110 <!-- RETENTION TIME PARAMETERS --> | |
111 <conditional name="rt"> | |
112 | |
113 <param name="match" label="Retention time match" type="select"> | |
114 <option value="false">Off</option> | |
115 <option value="true">On</option> | |
116 </param> | |
117 | |
118 <when value="false"></when> | |
119 <when value="true"> | |
120 <!-- Columns --> | |
121 <conditional name="cols"> | |
122 <param name="all" label="All chromatographic columns" type="select"> | |
123 <option value="true">Yes</option> | |
124 <option value="false">No</option> | |
125 </param> | |
126 <when value="true"></when> | |
127 <when value="false"> | |
128 <param name="list" label="Chromatographic columns" type="text" size="64" value="" help="Set here the list of chromatographic columns against which the retention time matching will be run. This is a comma separated list of the column names as used instead the database file."/> | |
129 </when> | |
130 </conditional> | |
131 | |
132 <!-- Tolerances --> | |
133 <param name="tolx" label="RTX retention time tolerance, parameter x (in seconds)" type="float" help="" value="5"/> | |
134 <param name="toly" label="RTY retention time tolerance, parameter y" type="float" help="" value="0.8"/> | |
135 <param name="tolz" label="RTZ retention time tolerance, used when precursor matching is enabled." type="float" help="" value="5"/> | |
136 </when> | |
137 </conditional> | |
138 | |
139 <!-- PRECURSOR MATCH --> | |
140 <conditional name="prec"> | |
141 | |
142 <param name="match" label="Precursor match" type="select"> | |
143 <option value="false">Off</option> | |
144 <option value="true">On</option> | |
145 </param> | |
146 | |
147 <when value="false"></when> | |
148 <when value="true"> | |
149 <!-- Negative precursors --> | |
150 <param name="neg" label="List of negative precursors" type="text" size="128" value="[(M-H)]-,[M-H]-,[(M+Cl)]-,[M+Cl]-" help=""> | |
151 <sanitizer> | |
152 <valid initial="string.printable"> | |
153 <remove value='"'/> | |
154 </valid> | |
155 <mapping initial="none"> | |
156 <add source='"' target='\"'/> | |
157 </mapping> | |
158 </sanitizer> | |
159 </param> | |
160 | |
161 <!-- Positive precursors --> | |
162 <param name="pos" label="List of positive precursors" type="text" size="128" value="[(M+H)]+,[M+H]+,[(M+Na)]+,[M+Na]+,[(M+K)]+,[M+K]+" help=""> | |
163 <sanitizer> | |
164 <valid initial="string.printable"> | |
165 <remove value='"'/> | |
166 </valid> | |
167 <mapping initial="none"> | |
168 <add source='"' target='\"'/> | |
169 </mapping> | |
170 </sanitizer> | |
171 </param> | |
172 </when> | |
173 </conditional> | |
174 | |
175 <!-- OUTPUT --> | |
176 <conditional name="out"> | |
177 | |
178 <param name="enabled" label="Output settings" type="select"> | |
179 <option value="false">Off</option> | |
180 <option value="true">On</option> | |
181 </param> | |
182 | |
183 <when value="false"></when> | |
184 <when value="true"> | |
185 | |
186 <!-- Output field names --> | |
187 <param name="outputfields" label="Output column names" type="text" size="256" value="mz=mz,rt=rt,col=col,colrt=colrt,molid=molid,attr=attr,comp=comp,int=int,rel=rel,mzexp=mzexp,mztheo=mztheo,molnames=molnames,molcomp=molcomp,molmass=molmass,inchi=inchi,inchikey=inchikey,pubchem=pubchem,chebi=chebi,hmdb=hmdb,kegg=kegg" help=""/> | |
188 | |
189 <!-- Molecule IDs separator character --> | |
190 <param name="molidssep" label="Molecule IDs separator character" type="text" size="3" value="|" help=""> | |
191 <sanitizer> | |
192 <valid initial="string.printable"> | |
193 <remove value='"'/> | |
194 </valid> | |
195 <mapping initial="none"> | |
196 <add source='"' target='\"'/> | |
197 </mapping> | |
198 </sanitizer> | |
199 </param> | |
200 </when> | |
201 </conditional> | |
202 | |
203 </inputs> | |
204 | |
205 <!--~~~~~~~ | |
206 ~ OUTPUTS ~ | |
207 ~~~~~~~~--> | |
208 | |
209 <outputs> | |
210 | |
211 <!-- Output file --> | |
212 <data name="mainoutput" label="lcmsmatch_${mzrtinput.name}" format="tabular"/> | |
213 <data name="peaksoutput" label="lcmsmatch_${mzrtinput.name}_peaks" format="tabular"/> | |
214 <data name="htmloutput" label="lcmsmatch_${mzrtinput.name}.html" format="html"/> | |
215 | |
216 </outputs> | |
217 | |
218 <!--~~~~~ | |
219 ~ TESTS ~ | |
220 ~~~~~~--> | |
221 | |
222 <tests> | |
223 | |
224 <!-- Simple quick test --> | |
225 <test> | |
226 <param name="dbfile" value="filedb.tsv"/> | |
227 <param name="mzrtinput" value="mz-input-small.tsv"/> | |
228 <param name="inputfields" value=""/> | |
229 <param name="dbfields" value=""/> | |
230 <param name="dbmsmodes" value=""/> | |
231 <param name="mzmode" value="pos"/> | |
232 <output name="mainoutput" file="filedb-small-mz-match-output.tsv"/> | |
233 <output name="peaksoutput" file="filedb-small-mz-match-peaks-output.tsv"/> | |
234 <output name="htmloutput" file="filedb-small-mz-match-html-output.html"/> | |
235 </test> | |
236 </tests> | |
237 | |
238 <!--~~~~ | |
239 ~ HELP ~ | |
240 ~~~~~--> | |
241 | |
242 <help> | |
243 <!-- @@@BEGIN_RST@@@ --> | |
244 | |
245 ============== | |
246 LC/MS matching | |
247 ============== | |
248 | |
249 This tool performs LC/MS matching on an input list of MZ/RT values, using a provided single file database. | |
250 | |
251 ----------- | |
252 Input files | |
253 ----------- | |
254 | |
255 Be careful to always provide UTF-8 encoded files, unless you do not use special characters at all. For instance, greek letters in molecule names give errors if the file is in latin1 (ISO 8859-1) or Windows 1252 (not distinguishable from latin1) encoding. | |
256 | |
257 Single file database | |
258 ==================== | |
259 | |
260 The database used is provided as a single file, in tabular format, through the *Database file* field. This file contains a list of MS peaks, with retention times. | |
261 Peaks are "duplicated" as much as necessary. For instance if 3 retention times are available on a compound with 10 peaks in positive mode, then there will be 30 lines for this compounds in positive mode. | |
262 | |
263 The file must contain a header with the column names. The names are free, but must be provided through the *File database column names* field. | |
264 In this field, each column is identified with a tag, and the columns names are listed as a comma separated list of tag/name couples (separated by character `=`). The allowed tags are the following ones: | |
265 | |
266 +--------------+------------+------------------------------------------------------------------------------------------------------------+ | |
267 | Column tag | Compulsory | Values | | |
268 +==============+============+============================================================================================================+ | |
269 | mztheo | Yes | The m/z values. | | |
270 +--------------+------------+------------------------------------------------------------------------------------------------------------+ | |
271 | mode | Yes | The MS mode. | | |
272 +--------------+------------+------------------------------------------------------------------------------------------------------------+ | |
273 | molid | Yes | This is the identifier of your compound. | | |
274 +--------------+------------+------------------------------------------------------------------------------------------------------------+ | |
275 | colrt | No | The retention time values in seconds. | | |
276 +--------------+------------+------------------------------------------------------------------------------------------------------------+ | |
277 | col | No | The chromatographic column associated with the retention time. Compulsory if retention times are provided. | | |
278 +--------------+------------+------------------------------------------------------------------------------------------------------------+ | |
279 | attr | No | The attribution of the peak (e.g.: ``[(M+H)-(H2O)-(NH3)]+``). | | |
280 +--------------+------------+------------------------------------------------------------------------------------------------------------+ | |
281 | comp | No | The composition of the peak (e.g.: ``C6 H10 N O``). | | |
282 +--------------+------------+------------------------------------------------------------------------------------------------------------+ | |
283 | molcomp | No | The composition of the molecule. (e.g.: ``C6H14N2O2``). | | |
284 +--------------+------------+------------------------------------------------------------------------------------------------------------+ | |
285 | molmass | No | The mass of the molecule. | | |
286 +--------------+------------+------------------------------------------------------------------------------------------------------------+ | |
287 | molnames | No | The names of the molecule, as a semicolon separated list. | | |
288 +--------------+------------+------------------------------------------------------------------------------------------------------------+ | |
289 | inchi | No | The InChI of the molecule. | | |
290 +--------------+------------+------------------------------------------------------------------------------------------------------------+ | |
291 | inchikey | No | The InChI key of the molecule. | | |
292 +--------------+------------+------------------------------------------------------------------------------------------------------------+ | |
293 | pubchem | No | The PubChem ID of the molecule. | | |
294 +--------------+------------+------------------------------------------------------------------------------------------------------------+ | |
295 | chebi | No | The ChEBI ID of the molecule. | | |
296 +--------------+------------+------------------------------------------------------------------------------------------------------------+ | |
297 | hmdb | No | The HMDB ID of the molecule. | | |
298 +--------------+------------+------------------------------------------------------------------------------------------------------------+ | |
299 | kegg | No | The KEGG ID of the molecule. | | |
300 +--------------+------------+------------------------------------------------------------------------------------------------------------+ | |
301 | |
302 The field *File database MS modes* allows you to personalize the MS mode identifiers. The value of the field is a comma separated list of mode/name couples (separated by character `=`).. | |
303 For instance, if in your database file you use characters '+' and '-' to identify the modes, then you must set the field to `pos=+,neg=-`. | |
304 | |
305 Example of database file (totally fake, no meaning): | |
306 | |
307 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ | |
308 | molid | mode | mz | composition | attribution | col | rt | molcomp | molmass | molnames | | |
309 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ | |
310 | A10 | "POS" | 112.07569 | "P9Z6W410 O" | "[(M+H)-(H2O)-(NH3)]+" | "colzz" | 5.69 | "J114L6M62O2" | 146.10553 | "Blablaine'" | | |
311 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ | |
312 | A10 | "POS" | 112.07569 | "P9Z6W410 O" | "[(M+H)-(H2O)-(NH3)]+" | "col12" | 0.8 | "J114L6M62O2" | 146.10553 | "Blablaine" | | |
313 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ | |
314 | A10 | "POS" | 112.07569 | "P9Z6W410 O" | "[(M+H)-(H2O)-(NH3)]+" | "somecol" | 8.97 | "J114L6M62O2" | 146.10553 | "Blablaine" | | |
315 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ | |
316 | A10 | "POS" | 191.076694 | "P92Z6W413 Na2 O2" | "[(M-H+2Na)]+" | "colAA" | 1.58 | "J114L6M62O2" | 146.10553 | "Blablaine" | | |
317 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ | |
318 | A10 | "POS" | 191.076694 | "P92Z6W413 Na2 O2" | "[(M-H+2Na)]+" | "colzz2" | 4.08 | "J114L6M62O2" | 146.10553 | "Blablaine" | | |
319 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ | |
320 | A10 | "POS" | 294.221687 | "U1113P94ZW429 O4" | "[(2M+H)]+ (13C)" | "somecol" | 8.97 | "J114L6M62O2" | 146.10553 | "Blablaine" | | |
321 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ | |
322 | A10 | "POS" | 72.080775 | "P9Z4W410 O0" | "[(M+H)-(J15L2M6O2)]+" | "hcoltt" | 0.8 | "J114L6M62O2" | 146.10553 | "Blablaine" | | |
323 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ | |
324 | A10 | "POS" | 112.07569 | "P9Z6W410 O" | "[(M+H)-(H2O)-(NH3)]+" | "colzz3" | 4.54 | "J114L6M62O2" | 146.10553 | "Blablaine" | | |
325 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ | |
326 | A10 | "POS" | 72.080775 | "P9Z4W410 O0" | "[(M+H)-(J15L2M6O2)]+" | "colzz3" | 4.54 | "J114L6M62O2" | 146.10553 | "Blablaine" | | |
327 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ | |
328 | A10 | "POS" | 72.080775 | "P9Z4W410 O0" | "[(M+H)-(J15L2M6O2)]+" | "colpp" | 0.89 | "J114L6M62O2" | 146.10553 | "Blablaine" | | |
329 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ | |
330 | A10 | "POS" | 145.097154 | "P92Z6W413 O2" | "[(M+H)-(H2)]+" | "hcoltt" | 0.8 | "J114L6M62O2" | 146.10553 | "Blablaine" | | |
331 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ | |
332 | |
333 MZ/RT input file | |
334 ================ | |
335 | |
336 The input to provide is a file, in a tabular format (or TSV: Tab Seperated Values), containing the list of MZ/RT values. | |
337 | |
338 The following columns will be used: | |
339 | |
340 +--------------+------------+---------------------------------------+ | |
341 | Column tag | Compulsory | Values | | |
342 +==============+============+=======================================+ | |
343 | mz | Yes | The m/z values. | | |
344 +--------------+------------+---------------------------------------+ | |
345 | rt | No | The retention time values in seconds. | | |
346 +--------------+------------+---------------------------------------+ | |
347 | |
348 The file may contain a header line, in which case you have to provide the column names through the *Input file column names* field, which consists in a comma separated list of tag/name couples (separated by character `=`). If your file does not contain a header line, then you must provide the column numbers. Examples: | |
349 | |
350 * With a header line having name MASS for mz column and RET for rt column: `mz=MASS,rt=RET`. | |
351 * With no header line: `mz=1,rt=2`. | |
352 | |
353 Since the MS spectrum mode can not be known from the file, an *MS mode* radio button field is provided for setting the mode. | |
354 | |
355 Example of file input: | |
356 | |
357 +-------------+-------------+ | |
358 | mz | rt | | |
359 +-------------+-------------+ | |
360 | 75.02080998 | 49.38210915 | | |
361 +-------------+-------------+ | |
362 | 75.05547146 | 0.658528069 | | |
363 +-------------+-------------+ | |
364 | 75.08059797 | 1743.94267 | | |
365 +-------------+-------------+ | |
366 | 76.03942694 | 51.23158899 | | |
367 +-------------+-------------+ | |
368 | 76.07584477 | 50.51249853 | | |
369 +-------------+-------------+ | |
370 | 76.07593168 | 0.149308136 | | |
371 +-------------+-------------+ | |
372 | |
373 ------------ | |
374 M/Z matching | |
375 ------------ | |
376 | |
377 In the simplest form of the algorithm only the *m/z* values are matched against the database peaks. This happens if both *Retention time match* and *Precursor match* are off. | |
378 | |
379 The first parameter is the MS mode, specified through the *MS mode* parameter. | |
380 | |
381 The parameters *M/Z precision* and *M/Z shift* are used by the algorithm in the following formula in order to match an *m/z* value: | |
382 | |
383 mz (1 + (- shift - precision) / 10^6) < mztheo < mz (1 + (- shift - precision) / 10^6) | |
384 | |
385 Where *mztheo* is the theoretical mass of the database peak that is tested. If this double inequality is true, then the *m/z* value is matched with this peak. | |
386 | |
387 -------------------- | |
388 Retention time match | |
389 -------------------- | |
390 | |
391 If at least one column is checked inside the *Columns* parameter section, then retention time is also matched, in addition to the *m/z* value, according to the following formula: | |
392 | |
393 rt - x - rt^y < colrt < rt + x + rt^y | |
394 | |
395 Where *x* is the value of the parameter *RTX* and *y* the value of the parameter *RTY*. | |
396 | |
397 If for a reference compound the database does not contain retention time for at least one of the specified columns, then only the *m/z* value is matched against the peaks of the reference compound. This means that in the results you can find compounds that do no match the provided retention time value. | |
398 | |
399 The *RTZ* parameter is used in the *Precursor match* algorithm (see below). | |
400 | |
401 --------------- | |
402 Precursor match | |
403 --------------- | |
404 | |
405 If the "Precursor match" option is enabled inside the parameters section, then a more sophisticated version of the algorithm, which is executed in two steps, is used. | |
406 | |
407 This algorithm takes two more parameters, one for each MS mode. These are the lists of precursors. Since the matching is run for one MS mode only, only one of the two parameters is used. Inside the single file database, all the peaks whose **attr** column value is equal to one of the precursor listed in *List of negative precursors* or *List of positive precursors*, depending on the mode, are considered as precursor peaks. | |
408 | |
409 M/Z matching using precursor matching | |
410 ===================================== | |
411 | |
412 1. Using the normal M/Z matching algorithm described above, we first look only for precursor peaks ([(M+H)]+, [(M+Na)]+, [(M+Cl)]-, ...). | |
413 2. From step 1, we construct a list of matched molecules. | |
414 3. We look at all peaks inside the molecule list obtained in step 2, using the normal M/Z matching algorithm described above. | |
415 | |
416 MZ/RT matching using precursor matching | |
417 ======================================= | |
418 | |
419 1. Using the normal MZ/RT matching algorithm described above, we first look only for precursor peaks ([(M+H)]+, [(M+Na)]+, [(M+Cl)]-, ...). | |
420 2. From step 1, we construct a list of matched molecules, retaining the matched retention time of each molecule. | |
421 3. For each input couple (m/z,rt), we look at all peaks inside the molecules taken from step 2, whose matched retention time between *rt - z* and *rt + z*, where *z* is the value of parameter *RTZ*. | |
422 | |
423 --------------- | |
424 Output settings | |
425 --------------- | |
426 | |
427 The *Output column names* parameter is used to customize the columns of the output files. As with the *File database column names* parameter, each column is identified with a tag, and the columns names are listed as a comma separated list of tag/name couples (separated by character `=`). The allowed tags are the following ones: | |
428 | |
429 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
430 | Column tag | Values | | |
431 +==============+=================================================================================================================================+ | |
432 | mz | The m/z values from the input file. | | |
433 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
434 | mztheo | The m/z values from the database. | | |
435 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
436 | molid | This is the identifier of your compound. | | |
437 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
438 | rt | The retention time values in seconds from the input file. | | |
439 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
440 | col | The chromatographic column associated with the retention time. | | |
441 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
442 | colrt | The retention time associated with the matched chromatographic column. | | |
443 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
444 | msmatching | The list IDs of matched molecules. IDs are separated by the character specified in the *Molecule IDs separator character* field | | |
445 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
446 | attr | The attribution of the peak (e.g.: ``[(M+H)-(H2O)-(NH3)]+``). | | |
447 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
448 | comp | The composition of the peak (e.g.: ``C6 H10 N O``). | | |
449 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
450 | molcomp | The composition of the molecule. (e.g.: ``C6H14N2O2``). | | |
451 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
452 | molmass | The mass of the molecule. | | |
453 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
454 | molnames | The names of the molecule, as a semicolon separated list. | | |
455 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
456 | inchi | The InChI of the molecule. | | |
457 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
458 | inchikey | The InChI key of the molecule. | | |
459 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
460 | pubchem | The PubChem ID of the molecule. | | |
461 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
462 | chebi | The ChEBI ID of the molecule. | | |
463 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
464 | hmdb | The HMDB ID of the molecule. | | |
465 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
466 | kegg | The KEGG ID of the molecule. | | |
467 +--------------+---------------------------------------------------------------------------------------------------------------------------------+ | |
468 | |
469 The *Molecule IDs separator character* is used to customize the character used to separate the molecule IDs of the **molid** column inside the *main* output file. | |
470 | |
471 Output files | |
472 ============ | |
473 | |
474 Three files are output by the tool. | |
475 | |
476 +-------------+--------------------------------------+--------------------------------------------------------+ | |
477 | Outputs | File name | Description | | |
478 +-------------+--------------------------------------+--------------------------------------------------------+ | |
479 | Main output | lcmsmatching_{input_file_name} | Contains the list of compounds that have been matched. | | |
480 +-------------+--------------------------------------+--------------------------------------------------------+ | |
481 | Peak list | lcmsmatching_peaks_{input_file_name} | Contains all matched database peaks. | | |
482 +-------------+--------------------------------------+--------------------------------------------------------+ | |
483 | HTML output | lcmsmatching_{input_file_name}.html | Contains the two tables on one page. | | |
484 +-------------+--------------------------------------+--------------------------------------------------------+ | |
485 | |
486 The **main** output is identical to the input file, to which is added an *msmatching* column. This column contains a list of IDs of the compounds that have been matched for this couple of (m/z, rt) values. | |
487 | |
488 The **peak list** output contains all database peaks that have been matched, for each (m/z, rt) input couple. Thus for each (m/z, rt) couple, there will be zero, one or more matched peaks output. The columns output are *mz*, *rt*, *id*, *mztheo*, *col*, *colrt*, *attribution* and *composition*, where *id* is the compound ID, *mztheo* is the theoretical mass of the fragment, *col* is the matched column and *colrt* is the retention time measured on the column for the reference compound. | |
489 | |
490 The **HTML** output contains the peak table with links toward HMDB, KEGG, ChEBI and PubChem public databases, when IDs are available. | |
491 | |
492 ===== | |
493 About | |
494 ===== | |
495 | |
496 .. class:: infomark | |
497 | |
498 **Author** | |
499 Pierrick Roger (pierrick.roger@cea.fr) wrote this MS matching method. | |
500 MetaboHUB: The French National Infrastructure for Metabolomics and Fluxomics (http://www.metabohub.fr/en). | |
501 | |
502 .. class:: infomark | |
503 | |
504 **Acknowledgement** | |
505 Data and algorithms have been kindly provided by Christophe Junot at *DSV/IBITEC-S/SPI* (*CEA/Saclay*), from a former application developped by Cyrille Petat and Arnaud Martel at *DSV/IBITEC-S/DIR* (*CEA/Saclay*). | |
506 | |
507 .. class:: infomark | |
508 | |
509 **Please cite** | |
510 R Core Team (2013). R: A language and Environment for Statistical Computing. http://www.r-project.org | |
511 | |
512 <!-- @@@END_RST@@@ --> | |
513 </help> | |
514 | |
515 <!--~~~~~~~~~ | |
516 ~ CITATIONS ~ | |
517 ~~~~~~~~~~--> | |
518 | |
519 <citations/> | |
520 | |
521 </tool> |