comparison lcmsmatching.xml @ 5:fb9c0409d85c draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
author prog
date Wed, 19 Apr 2017 10:00:05 -0400
parents b34c14151f25
children f86fec07f392
comparison
equal deleted inserted replaced
4:b34c14151f25 5:fb9c0409d85c
1 <tool id="lcmsmatching" name="LC/MS matching" version="3.2.0" profile="16.01"> 1 <tool id="lcmsmatching" name="LC/MS matching" version="3.3.1" profile="16.01">
2 2
3 <description>Annotation of MS peaks using matching on a spectra database.</description> 3 <description>Annotation of MS peaks using matching on a spectra database.</description>
4 4
5 <requirements> 5 <requirements>
6 <!--<requirement type="package" version="3.3.3">r</requirement>-->
7 <requirement type="package" version="7.0">readline</requirement> <!-- Try readline 7.0 -->
6 <requirement type="package" version="1.20.0">r-getopt</requirement> 8 <requirement type="package" version="1.20.0">r-getopt</requirement>
7 <requirement type="package" version="1.0.0">r-stringr</requirement> 9 <requirement type="package" version="1.0.0">r-stringr</requirement>
8 <requirement type="package" version="1.8.3">r-plyr</requirement> 10 <requirement type="package" version="1.8.3">r-plyr</requirement>
9 <requirement type="package" version="3.98">r-xml</requirement> 11 <requirement type="package" version="3.98">r-xml</requirement>
10 <requirement type="package" version="1.0_6">r-bitops</requirement> 12 <requirement type="package" version="1.0_6">r-bitops</requirement>
11 <requirement type="package" version="1.95">r-rcurl</requirement> 13 <requirement type="package" version="1.95">r-rcurl</requirement>
12 <requirement type="package" version="1.3">r-rjsonio</requirement> 14 <requirement type="package" version="1.1">r-jsonlite</requirement>
13 </requirements> 15 </requirements>
14 16
15 <code file="list-chrom-cols.py"/> 17 <code file="list-chrom-cols.py"/>
16 18 <code file="list-file-cols.py"/>
17 <!--~~~~~~~ 19 <code file="list-ms-mode-values.py"/>
18 ~ COMMAND ~ 20
19 ~~~~~~~~--> 21 <!--=======
22 = COMMAND =
23 ========-->
20 24
21 <command> 25 <command>
22 <![CDATA[ 26 <![CDATA[
23 ## @@@BEGIN_CHEETAH@@@ 27 ## @@@BEGIN_CHEETAH@@@
24 $__tool_directory__/search-mz -i "$mzrtinput" 28 $__tool_directory__/search-mz
29
30 ## Input file
31 -i "$mzrtinput"
32 --input-col-names "mz=$inputmzfield,rt=$inputrtfield"
33 --rtunit "$inputrtunit"
25 34
26 ## Database 35 ## Database
27 #if $db.dbtype == "inhouse" 36 #if $db.dbtype == "inhouse"
28 -d file 37 -d file
29 --db-fields "$db.dbfields" 38 --db-fields "mztheo=$db.dbmzreffield,chromcolrt=$db.dbchromcolrtfield,compoundid=$db.dbspectrumidfield,chromcol=$db.dbchromcolfield,msmode=$db.dbmsmodefield,peakattr=$db.dbpeakattrfield,pubchemcompid=$db.dbpubchemcompidfield,chebiid=$db.dbchebiidfield,hmdbid=$db.dbhmdbidfield,keggid=$db.dbkeggidfield"
30 --db-ms-modes "$db.dbmsmodes" 39 --db-ms-modes "pos=$db.dbmsposmode,neg=$db.dbmsnegmode"
40 --db-rt-unit $db.dbrtunit
31 #end if 41 #end if
32 #if $db.dbtype == "peakforest" 42 #if $db.dbtype == "peakforest"
33 -d peakforest 43 -d peakforest
34 --db-token "$db.dbtoken" 44 --db-token "$db.dbtoken"
35 #end if 45 #end if
55 -o "$mainoutput" --peak-output-file "$peaksoutput" --same-rows --same-cols 65 -o "$mainoutput" --peak-output-file "$peaksoutput" --same-rows --same-cols
56 66
57 ## HTML output 67 ## HTML output
58 --html-output-file "$htmloutput" --no-main-table-in-html-output 68 --html-output-file "$htmloutput" --no-main-table-in-html-output
59 69
60 ## Fields of input file
61 --input-col-names "$inputfields"
62
63 ## Ouput setting 70 ## Ouput setting
64 #if $out.enabled == "true" 71 --molids-sep "$molidssep"
65 --output-col-names "$out.outputfields"
66 --molids-sep "$out.molidssep"
67 #else
68 --molids-sep "|"
69 #end if
70 ## @@@END_CHEETAH@@@ 72 ## @@@END_CHEETAH@@@
71 ]]></command> 73 ]]></command>
72 74
73 <!--~~~~~~ 75 <!--======
74 ~ INPUTS ~ 76 = INPUTS =
75 ~~~~~~~--> 77 =======-->
76 78
77 <inputs> 79 <inputs>
78 80
79 <!-- DATABASE --> 81 <!-- DATABASE -->
80 82
88 <when value="inhouse"> 90 <when value="inhouse">
89 <!-- Database file --> 91 <!-- Database file -->
90 <param name="dburl" label="Database file" type="data" format="tabular,tsv" refresh_on_change="true" help="Decimal: '.', missing: NA, mode: character and numerical, sep: tabular. Retention time values must be in seconds."/> 92 <param name="dburl" label="Database file" type="data" format="tabular,tsv" refresh_on_change="true" help="Decimal: '.', missing: NA, mode: character and numerical, sep: tabular. Retention time values must be in seconds."/>
91 93
92 <!-- File database field names --> 94 <!-- File database field names -->
93 <param name="dbfields" label="File database column names" type="text" size="256" value="mztheo=mztheo,chromcolrt=chromcolrt,compoundid=compoundid,chromcol=chromcol,msmode=msmode,peakattr=peakattr,peakcomp=peakcomp,fullnames=fullnames,compoundmass=compoundmass,compoundcomp=compoundcomp,inchi=inchi,inchikey=inchikey,pubchemcompid=pubchemcompid,chebiid=chebiid,hmdbid=hmdbid,keggid=keggid" refresh_on_change="true" help=""/> 95 <param name="dbspectrumidfield" type="select" label="Database file Spectrum ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'spectrumid,accession,compoundid,molid')" help="Select the Spectrum ID column of the database file."/>
96 <param name="dbmzreffield" type="select" label="Database file Reference MZ column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'mztheo,mzexp,mz')" help="Select the Reference MZ column of the database file."/>
97 <param name="dbchromcolfield" type="select" label="Database file Chromatographic Column Name column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'chromcol,col')" help="Select the Chromatographic Column Name column of the database file." refresh_on_change="true"/>
98 <param name="dbchromcolrtfield" type="select" label="Database file Chromatographic Column Retention Time column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'chromcolrt,colrt,rt')" help="Select the Chromatographic Column Retention Time column of the database file."/>
99 <param name="dbmsmodefield" type="select" label="Database file MS Mode column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'msmode,mode')" help="Select the MS Mode column of the database file." refresh_on_change="true"/>
100 <param name="dbpeakattrfield" type="select" label="Database file Peak Attribution column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'peakattr,attr')" help="Select the Peak Attribution column of the database file."/>
101 <param name="dbpubchemcompidfield" type="select" label="Database file PubChem Compound ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'pubchemcompid,pubchemid,pubchemcomp,pubchem')" help="Select the PubChem Compound ID column of the database file."/>
102 <param name="dbchebiidfield" type="select" label="Database file ChEBI ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'chebiid,chebi')" help="Select the ChEBI ID column of the database file."/>
103 <param name="dbhmdbidfield" type="select" label="Database file HMDB Metabolite ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'hmdbid,hmdb')" help="Select the HMDB Metabolite ID column of the database file."/>
104 <param name="dbkeggidfield" type="select" label="Database file KEGG Compound ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'keggid,kegg')" help="Select the KEGG Compound ID column of the database file."/>
94 105
95 <!-- File database MS modes --> 106 <!-- File database MS modes -->
96 <param name="dbmsmodes" label="File database MS modes" type="text" size="32" value="pos=POS,neg=NEG" help=""/> 107 <param name="dbmsposmode" label="File database MS Positive mode" type="select" dynamic_options="get_ms_mode_value(file = db['dburl'], col = db['dbmsmodefield'], preferred = 'POS,pos,+')" help="Select the value used to identify the positive MS mode."/>
108 <param name="dbmsnegmode" label="File database MS Negative mode" type="select" dynamic_options="get_ms_mode_value(file = db['dburl'], col = db['dbmsmodefield'], preferred = 'NEG,neg,-')" help="Select the value used to identify the negitive MS mode."/>
109
110 <!-- File database RT unit -->
111 <param name="dbrtunit" label="Retention time unit" type="select" display="radio" multiple="false" help="">
112 <option value="sec">Seconds</option>
113 <option value="min">Minutes</option>
114 </param>
97 115
98 <param name="dbtoken" type="text" size="32" value="" hidden="true"/> 116 <param name="dbtoken" type="text" size="32" value="" hidden="true"/>
99 </when> 117 </when>
100 118
101 <when value="peakforest"> 119 <when value="peakforest">
102 <param name="dburl" type="text" size="128" value="https://peakforest-alpha.inra.fr/rest" refresh_on_change="true"/> 120 <param name="dburl" type="text" size="128" value="https://peakforest-alpha.inra.fr/rest" refresh_on_change="true"/>
103 121
104 <param name="dbtoken" label="Peakforest security token" type="text" size="32" value="" refresh_on_change="true" help="If you do not have yet a Peakforest token, go to Peakforest website and request one from your account."/> 122 <param name="dbtoken" label="Peakforest security token" type="text" size="32" value="" refresh_on_change="true" help="If you do not have yet a Peakforest token, go to Peakforest website and request one from your account."/>
105 123
106 <param name="dbfields" type="text" size="32" value="" hidden="true"/> 124 <param name="dbchromcolfield" type="text" size="32" value="" hidden="true"/>
107 </when> 125 </when>
108 </conditional> 126 </conditional>
109 127
110 <!-- INPUT --> 128 <!-- INPUT -->
111 129
112 <!-- Input file --> 130 <!-- Input file -->
113 <param name="mzrtinput" label="Input file - MZ(/RT) values" type="data" format="tabular,tsv" help="Decimal: '.', missing: NA, mode: character and numerical, sep: tabular. RT values must be in seconds."/> 131 <param name="mzrtinput" label="Input file - MZ(/RT) values" type="data" format="tabular,tsv" refresh_on_change="true" help="Decimal: '.', missing: NA, mode: character and numerical, sep: tabular. RT values must be in seconds."/>
114 132
115 <!-- Input field names --> 133 <!-- Input field field names -->
116 <param name="inputfields" label="Input file column names" type="text" size="32" value="mz=mzmed,rt=rtmed" help=""/> 134 <param name="inputmzfield" type="select" label="Input file MZ column name" dynamic_options="get_file_cols(file = mzrtinput, preferred = 'mzmed,mz')" help="Select the MZ column of the input file."/>
135 <param name="inputrtfield" type="select" label="Input file RT column name" dynamic_options="get_file_cols(file = mzrtinput, preferred = 'rtmed,rt')" help="Select the RT column of the input file."/>
136
137 <!-- Input file RT unit -->
138 <param name="inputrtunit" label="Retention time unit" type="select" display="radio" multiple="false" help="">
139 <option value="sec">Seconds</option>
140 <option value="min">Minutes</option>
141 </param>
117 142
118 <!-- M/Z MATCHING --> 143 <!-- M/Z MATCHING -->
119 144
120 <!-- Mode --> 145 <!-- Mode -->
121 <param name="mzmode" label="MS mode" type="select" display="radio" multiple="false" help=""> 146 <param name="mzmode" label="MS mode" type="select" display="radio" multiple="false" help="">
128 <param name="mzshift" label="M/Z shift (in ppm)" type="float" help="" value="0"/> 153 <param name="mzshift" label="M/Z shift (in ppm)" type="float" help="" value="0"/>
129 154
130 <!-- RETENTION TIME PARAMETERS --> 155 <!-- RETENTION TIME PARAMETERS -->
131 156
132 <!-- List of chromatographic columns --> 157 <!-- List of chromatographic columns -->
133 <param name="chromcols" type="select" label="Chromatographic columns" multiple="true" dynamic_options="get_chrom_cols(dbtype = db['dbtype'], dburl = db['dburl'], dbtoken = db['dbtoken'], dbfields = db['dbfields'])" help="Select here the set of chromatographic columns against which the retention time matching will be run."/> 158 <param name="chromcols" type="select" label="Chromatographic columns" multiple="true" dynamic_options="get_chrom_cols(dbtype = db['dbtype'], dburl = db['dburl'], dbtoken = db['dbtoken'], col_field = db['dbchromcolfield'])" help="Select here the set of chromatographic columns against which the retention time matching will be run."/>
134 159
135 <!-- Tolerances --> 160 <!-- Tolerances -->
136 <param name="tolx" label="RTX retention time tolerance, parameter x (in seconds)" type="float" help="" value="5"/> 161 <param name="tolx" label="RTX retention time tolerance, parameter x (in seconds)" type="float" help="" value="5"/>
137 <param name="toly" label="RTY retention time tolerance, parameter y" type="float" help="" value="0.8"/> 162 <param name="toly" label="RTY retention time tolerance, parameter y" type="float" help="" value="0.8"/>
138 <param name="tolz" label="RTZ retention time tolerance, used when precursor matching is enabled." type="float" help="" value="5"/> 163 <param name="tolz" label="RTZ retention time tolerance, used when precursor matching is enabled." type="float" help="" value="5"/>
172 </param> 197 </param>
173 </when> 198 </when>
174 </conditional> 199 </conditional>
175 200
176 <!-- OUTPUT --> 201 <!-- OUTPUT -->
177 <conditional name="out"> 202 <!-- Molecule IDs separator character -->
178 203 <param name="molidssep" label="Molecule IDs separator character" type="text" size="3" value="|" help="">
179 <param name="enabled" label="Output settings" type="select"> 204 <sanitizer>
180 <option value="false">Default</option> 205 <valid initial="string.printable">
181 <option value="true">Customized</option> 206 <remove value='"'/>
182 </param> 207 </valid>
183 208 <mapping initial="none">
184 <when value="false"></when> 209 <add source='"' target='\"'/>
185 <when value="true"> 210 </mapping>
186 211 </sanitizer>
187 <!-- Output field names --> 212 </param>
188 <param name="outputfields" label="Output column names" type="text" size="256" value="mz=mz,rt=rt,chromcol=chromcol,chromcolrt=chromcolrt,compoundid=compoundid,peakattr=peakattr,peakcomp=peakcomp,intensity=intensity,relative.intensity=relative.intensity,mzexp=mzexp,mztheo=mztheo,fullnames=fullnames,compoundmass=compoundmass,compoundcomp=compoundcomp,inchi=inchi,inchikey=inchikey,pubchemcompid=pubchemcompid,chebiid=chebiid,hmdbid=hmdbid,keggid=keggid" help=""/>
189
190 <!-- Molecule IDs separator character -->
191 <param name="molidssep" label="Molecule IDs separator character" type="text" size="3" value="|" help="">
192 <sanitizer>
193 <valid initial="string.printable">
194 <remove value='"'/>
195 </valid>
196 <mapping initial="none">
197 <add source='"' target='\"'/>
198 </mapping>
199 </sanitizer>
200 </param>
201 </when>
202 </conditional>
203 213
204 </inputs> 214 </inputs>
205 215
206 <!--~~~~~~~ 216 <!--=======
207 ~ OUTPUTS ~ 217 = OUTPUTS =
208 ~~~~~~~~--> 218 ========-->
209 219
210 <outputs> 220 <outputs>
211 221
212 <!-- Output file --> 222 <!-- Output file -->
213 <data name="mainoutput" label="lcmsmatch_${mzrtinput.name}" format="tabular"/> 223 <data name="mainoutput" label="lcmsmatch_${mzrtinput.name}" format="tabular"/>
214 <data name="peaksoutput" label="lcmsmatch_${mzrtinput.name}_peaks" format="tabular"/> 224 <data name="peaksoutput" label="lcmsmatch_${mzrtinput.name}_peaks" format="tabular"/>
215 <data name="htmloutput" label="lcmsmatch_${mzrtinput.name}.html" format="html"/> 225 <data name="htmloutput" label="lcmsmatch_${mzrtinput.name}.html" format="html"/>
216 226
217 </outputs> 227 </outputs>
218 228
219 <!--~~~~~ 229 <!--=====
220 ~ TESTS ~ 230 = TESTS =
221 ~~~~~~--> 231 ======-->
222 232
223 <tests> 233 <tests>
224 234
225 <!-- File database test --> 235 <!-- File database test -->
226 <test> 236 <test>
227 <param name="dbtype" value="inhouse"/> 237 <param name="dbtype" value="inhouse"/>
228 <param name="dburl" value="filedb.tsv"/> 238 <param name="dburl" value="filedb.tsv"/>
229 <param name="dbfields" value=""/> 239 <param name="dbfields" value=""/>
230 <param name="dbmsmodes" value=""/> 240 <param name="dbmsmodes" value=""/>
231 <param name="mzrtinput" value="mz-input-small.tsv"/> 241 <param name="mzrtinput" value="mz-input-small.tsv"/>
232 <param name="inputfields" value=""/> 242 <param name="inputmzfield" value="mzmed"/>
243 <param name="inputrtfield" value="rtmed"/>
233 <param name="mzmode" value="pos"/> 244 <param name="mzmode" value="pos"/>
234 <output name="mainoutput" file="filedb-small-mz-match-output.tsv"/> 245 <output name="mainoutput" file="filedb-small-mz-match-output.tsv"/>
235 <output name="peaksoutput" file="filedb-small-mz-match-peaks-output.tsv"/> 246 <output name="peaksoutput" file="filedb-small-mz-match-peaks-output.tsv"/>
236 <output name="htmloutput" file="filedb-small-mz-match-html-output.html"/> 247 <output name="htmloutput" file="filedb-small-mz-match-html-output.html"/>
237 </test> 248 </test>
251 </output> 262 </output>
252 </test> 263 </test>
253 --> 264 -->
254 </tests> 265 </tests>
255 266
256 <!--~~~~ 267 <!--====
257 ~ HELP ~ 268 = HELP =
258 ~~~~~--> 269 =====-->
259 270
260 <help> 271 <help>
261 <!-- @@@BEGIN_RST@@@ --> 272 <!-- @@@BEGIN_RST@@@ -->
262 273
263 ============== 274 ==============
270 Database 281 Database
271 -------- 282 --------
272 283
273 When selecting the database, you have the choice between a Peakforest database or an in-house file. 284 When selecting the database, you have the choice between a Peakforest database or an in-house file.
274 285
275 For the Peakforest database, a default REST web base address is already provided. But you can change it of you want to use a custom database. A field is also available for setting a token key in case the access to the Peakforest database you want to use is restricted. This is the case of the default database. 286 For the Peakforest database, a default REST web base address is already provided. But you can change it to use a custom database. A field is also available for setting a token key in case the access to the Peakforest database you want to use is restricted. This is the case of the default database URL.
276 287
277 For the in-house file, please refer to the paragraph "Single file database" below. 288 For the in-house file, please refer to the paragraph "Single file database" below.
278 289
279 ----------- 290 -----------
280 Input files 291 Input files
283 Be careful to always provide UTF-8 encoded files, unless you do not use special characters at all. For instance, greek letters in molecule names give errors if the file is in latin1 (ISO 8859-1) or Windows 1252 (not distinguishable from latin1) encoding. 294 Be careful to always provide UTF-8 encoded files, unless you do not use special characters at all. For instance, greek letters in molecule names give errors if the file is in latin1 (ISO 8859-1) or Windows 1252 (not distinguishable from latin1) encoding.
284 295
285 Single file database 296 Single file database
286 ==================== 297 ====================
287 298
288 The database used is provided as a single file, in tabular format, through the *Database file* field. This file contains a list of MS peaks, with retention times. 299 The database used is provided as a single file, in tabular format, through the *Database file* field. This file must contain a list of MS peaks, with possibly retention times.
289 Peaks are "duplicated" as much as necessary. For instance if 3 retention times are available on a compound with 10 peaks in positive mode, then there will be 30 lines for this compounds in positive mode. 300 Peaks are "duplicated" as much as necessary. For instance if 3 retention times are available on a compound with 10 peaks in positive mode, then there will be 30 lines for this compound in positive mode.
290 301
291 The file must contain a header with the column names. The names are free, but must be provided through the *File database column names* field. 302 The file must contain a header with the column names. The names are free, but must be provided through the different fields named *Database file ... column name*.
292 In this field, each column is identified with a tag, and the columns names are listed as a comma separated list of tag/name couples (separated by character `=`). The allowed tags are the following ones: 303 Then you must provide the values used to identify the MS modes (positive and negative).
293 304
294 +--------------+------------+------------------------------------------------------------------------------------------------------------+ 305 A last information about the single file database is the unit of the retention times, either in seconds or in minutes.
295 | Column tag | Compulsory | Values |
296 +==============+============+============================================================================================================+
297 | mztheo | Yes | The m/z values. |
298 +--------------+------------+------------------------------------------------------------------------------------------------------------+
299 | mode | Yes | The MS mode. |
300 +--------------+------------+------------------------------------------------------------------------------------------------------------+
301 | molid | Yes | This is the identifier of your compound. |
302 +--------------+------------+------------------------------------------------------------------------------------------------------------+
303 | colrt | No | The retention time values in seconds. |
304 +--------------+------------+------------------------------------------------------------------------------------------------------------+
305 | col | No | The chromatographic column associated with the retention time. Compulsory if retention times are provided. |
306 +--------------+------------+------------------------------------------------------------------------------------------------------------+
307 | attr | No | The attribution of the peak (e.g.: ``[(M+H)-(H2O)-(NH3)]+``). |
308 +--------------+------------+------------------------------------------------------------------------------------------------------------+
309 | comp | No | The composition of the peak (e.g.: ``C6 H10 N O``). |
310 +--------------+------------+------------------------------------------------------------------------------------------------------------+
311 | molcomp | No | The composition of the molecule. (e.g.: ``C6H14N2O2``). |
312 +--------------+------------+------------------------------------------------------------------------------------------------------------+
313 | molmass | No | The mass of the molecule. |
314 +--------------+------------+------------------------------------------------------------------------------------------------------------+
315 | molnames | No | The names of the molecule, as a semicolon separated list. |
316 +--------------+------------+------------------------------------------------------------------------------------------------------------+
317 | inchi | No | The InChI of the molecule. |
318 +--------------+------------+------------------------------------------------------------------------------------------------------------+
319 | inchikey | No | The InChI key of the molecule. |
320 +--------------+------------+------------------------------------------------------------------------------------------------------------+
321 | pubchem | No | The PubChem ID of the molecule. |
322 +--------------+------------+------------------------------------------------------------------------------------------------------------+
323 | chebi | No | The ChEBI ID of the molecule. |
324 +--------------+------------+------------------------------------------------------------------------------------------------------------+
325 | hmdb | No | The HMDB ID of the molecule. |
326 +--------------+------------+------------------------------------------------------------------------------------------------------------+
327 | kegg | No | The KEGG ID of the molecule. |
328 +--------------+------------+------------------------------------------------------------------------------------------------------------+
329
330 The field *File database MS modes* allows you to personalize the MS mode identifiers. The value of the field is a comma separated list of mode/name couples (separated by character `=`)..
331 For instance, if in your database file you use characters '+' and '-' to identify the modes, then you must set the field to `pos=+,neg=-`.
332 306
333 Example of database file (totally fake, no meaning): 307 Example of database file (totally fake, no meaning):
334 308
335 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ 309 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+
336 | molid | mode | mz | composition | attribution | col | rt | molcomp | molmass | molnames | 310 | molid | mode | mz | composition | attribution | col | rt | molcomp | molmass | molnames |
359 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ 333 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+
360 334
361 MZ/RT input file 335 MZ/RT input file
362 ================ 336 ================
363 337
364 The input to provide is a file, in a tabular format (or TSV: Tab Seperated Values), containing the list of MZ/RT values. 338 The input to provide is a file, in a tabular format (or TSV: Tab Seperated Values), containing the list of M/Z values, with possibly also RT values.
365 339
366 The following columns will be used: 340 The column names for the M/Z and RT values must be provided through the fields *Input file MZ column name* and *Input file RT column name*.
367 341 As a consequence, the file must contain a header line.
368 +--------------+------------+---------------------------------------+ 342
369 | Column tag | Compulsory | Values | 343 The unit of the retention time has to be provided with the field *Retention time unit*.
370 +==============+============+=======================================+
371 | mz | Yes | The m/z values. |
372 +--------------+------------+---------------------------------------+
373 | rt | No | The retention time values in seconds. |
374 +--------------+------------+---------------------------------------+
375
376 The file may contain a header line, in which case you have to provide the column names through the *Input file column names* field, which consists in a comma separated list of tag/name couples (separated by character `=`). If your file does not contain a header line, then you must provide the column numbers. Examples:
377
378 * With a header line having name MASS for mz column and RET for rt column: `mz=MASS,rt=RET`.
379 * With no header line: `mz=1,rt=2`.
380
381 Since the MS spectrum mode can not be known from the file, an *MS mode* radio button field is provided for setting the mode.
382 344
383 Example of file input: 345 Example of file input:
384 346
385 +-------------+-------------+ 347 +-------------+-------------+
386 | mz | rt | 348 | mz | rt |
406 368
407 The first parameter is the MS mode, specified through the *MS mode* parameter. 369 The first parameter is the MS mode, specified through the *MS mode* parameter.
408 370
409 The parameters *M/Z precision* and *M/Z shift* are used by the algorithm in the following formula in order to match an *m/z* value: 371 The parameters *M/Z precision* and *M/Z shift* are used by the algorithm in the following formula in order to match an *m/z* value:
410 372
411 mz (1 + (- shift - precision) / 10^6) &lt; mztheo &lt; mz (1 + (- shift - precision) / 10^6) 373 mz (1 + (- shift - precision) / 10^6) &lt; mzref &lt; mz (1 + (- shift - precision) / 10^6)
412 374
413 Where *mztheo* is the theoretical mass of the database peak that is tested. If this double inequality is true, then the *m/z* value is matched with this peak. 375 Where *mzref* is the M/Z of reference from the database peak that is tested. If this double inequality is true, then the *m/z* value is matched with this peak.
414 376
415 -------------------- 377 --------------------
416 Retention time match 378 Retention time match
417 -------------------- 379 --------------------
418 380
419 If at least one column is checked inside the *Columns* parameter section, then retention time is also matched, in addition to the *m/z* value, according to the following formula: 381 If at least one column is selected inside the *Chromatographic columns* parameter section, then retention time is also matched, in addition to the *m/z* value, according to the following formula:
420 382
421 rt - x - rt^y &lt; colrt &lt; rt + x + rt^y 383 rt - x - rt^y &lt; colrt &lt; rt + x + rt^y
422 384
423 Where *x* is the value of the parameter *RTX* and *y* the value of the parameter *RTY*. 385 Where *x* is the value of the parameter *RTX* and *y* the value of the parameter *RTY*.
424 386
449 3. For each input couple (m/z,rt), we look at all peaks inside the molecules taken from step 2, whose matched retention time between *rt - z* and *rt + z*, where *z* is the value of parameter *RTZ*. 411 3. For each input couple (m/z,rt), we look at all peaks inside the molecules taken from step 2, whose matched retention time between *rt - z* and *rt + z*, where *z* is the value of parameter *RTZ*.
450 412
451 --------------- 413 ---------------
452 Output settings 414 Output settings
453 --------------- 415 ---------------
454
455 The *Output column names* parameter is used to customize the columns of the output files. As with the *File database column names* parameter, each column is identified with a tag, and the columns names are listed as a comma separated list of tag/name couples (separated by character `=`). The allowed tags are the following ones:
456
457 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
458 | Column tag | Values |
459 +==============+=================================================================================================================================+
460 | mz | The m/z values from the input file. |
461 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
462 | mztheo | The m/z values from the database. |
463 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
464 | molid | This is the identifier of your compound. |
465 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
466 | rt | The retention time values in seconds from the input file. |
467 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
468 | col | The chromatographic column associated with the retention time. |
469 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
470 | colrt | The retention time associated with the matched chromatographic column. |
471 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
472 | msmatching | The list IDs of matched molecules. IDs are separated by the character specified in the *Molecule IDs separator character* field |
473 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
474 | attr | The attribution of the peak (e.g.: ``[(M+H)-(H2O)-(NH3)]+``). |
475 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
476 | comp | The composition of the peak (e.g.: ``C6 H10 N O``). |
477 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
478 | molcomp | The composition of the molecule. (e.g.: ``C6H14N2O2``). |
479 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
480 | molmass | The mass of the molecule. |
481 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
482 | molnames | The names of the molecule, as a semicolon separated list. |
483 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
484 | inchi | The InChI of the molecule. |
485 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
486 | inchikey | The InChI key of the molecule. |
487 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
488 | pubchem | The PubChem ID of the molecule. |
489 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
490 | chebi | The ChEBI ID of the molecule. |
491 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
492 | hmdb | The HMDB ID of the molecule. |
493 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
494 | kegg | The KEGG ID of the molecule. |
495 +--------------+---------------------------------------------------------------------------------------------------------------------------------+
496 416
497 The *Molecule IDs separator character* is used to customize the character used to separate the molecule IDs of the **molid** column inside the *main* output file. 417 The *Molecule IDs separator character* is used to customize the character used to separate the molecule IDs of the **molid** column inside the *main* output file.
498 418
499 Output files 419 Output files
500 ============ 420 ============
538 R Core Team (2013). R: A language and Environment for Statistical Computing. http://www.r-project.org 458 R Core Team (2013). R: A language and Environment for Statistical Computing. http://www.r-project.org
539 459
540 <!-- @@@END_RST@@@ --> 460 <!-- @@@END_RST@@@ -->
541 </help> 461 </help>
542 462
543 <!--~~~~~~~~~ 463 <!--=========
544 ~ CITATIONS ~ 464 = CITATIONS =
545 ~~~~~~~~~~--> 465 ==========-->
546 466
547 <citations/> 467 <citations/>
548 468
549 </tool> 469 </tool>