comparison lcmsmatching.xml @ 6:f86fec07f392 draft default tip

planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
author prog
date Fri, 22 Feb 2019 16:04:22 -0500
parents fb9c0409d85c
children
comparison
equal deleted inserted replaced
5:fb9c0409d85c 6:f86fec07f392
1 <tool id="lcmsmatching" name="LC/MS matching" version="3.3.1" profile="16.01"> 1 <!-- vi: se fdm=marker : -->
2 2 <tool id="lcmsmatching" name="LCMS matching" version="4.0.2" profile="18.05">
3 <description>Annotation of MS peaks using matching on a spectra database.</description> 3
4 4 <description>Annotation of LCMS peaks using matching on a in-house spectra database or on PeakForest spectra database.</description>
5
6 <!-- Requirements {{{1 -->
7 <!-- **************************************************************** -->
5 <requirements> 8 <requirements>
6 <!--<requirement type="package" version="3.3.3">r</requirement>--> 9 <requirement type="package" version="1.2.2">r-biodb</requirement>
7 <requirement type="package" version="7.0">readline</requirement> <!-- Try readline 7.0 --> 10 <requirement type="package" version="1.20.2">r-getopt</requirement>
8 <requirement type="package" version="1.20.0">r-getopt</requirement> 11 <requirement type="package" version="0.2_15">r-codetools</requirement> <!-- R_VERSION="0.2-15" IMPORTANT Do not remove, used by travis_install_deps.sh script. --> <!-- codetools package is needed because of the following error when running Galaxy on Travis-CI in planemo tests: "code for methods in class “HtmlWriter” was not checked for suspicious field assignments (recommended package ‘codetools’ not available?)". -->
9 <requirement type="package" version="1.0.0">r-stringr</requirement> 12
10 <requirement type="package" version="1.8.3">r-plyr</requirement>
11 <requirement type="package" version="3.98">r-xml</requirement>
12 <requirement type="package" version="1.0_6">r-bitops</requirement>
13 <requirement type="package" version="1.95">r-rcurl</requirement>
14 <requirement type="package" version="1.1">r-jsonlite</requirement>
15 </requirements> 13 </requirements>
16 14
17 <code file="list-chrom-cols.py"/> 15 <!-- Command {{{1 -->
18 <code file="list-file-cols.py"/> 16 <!-- **************************************************************** -->
19 <code file="list-ms-mode-values.py"/>
20
21 <!--=======
22 = COMMAND =
23 ========-->
24 17
25 <command> 18 <command>
26 <![CDATA[ 19 <![CDATA[
27 ## @@@BEGIN_CHEETAH@@@ 20 ## @@@BEGIN_CHEETAH@@@
28 $__tool_directory__/search-mz 21 $__tool_directory__/lcmsmatching
22
23 --log-to-stdout
29 24
30 ## Input file 25 ## Input file
31 -i "$mzrtinput" 26 -i "$mzrtinput"
32 --input-col-names "mz=$inputmzfield,rt=$inputrtfield" 27 --input-col-names "$inputfields"
33 --rtunit "$inputrtunit" 28 --rtunit "$inputrtunit"
34 29
35 ## Database 30 ## Database
36 #if $db.dbtype == "inhouse" 31 #if $db.dbtype == "inhouse"
37 -d file 32 -d file
38 --db-fields "mztheo=$db.dbmzreffield,chromcolrt=$db.dbchromcolrtfield,compoundid=$db.dbspectrumidfield,chromcol=$db.dbchromcolfield,msmode=$db.dbmsmodefield,peakattr=$db.dbpeakattrfield,pubchemcompid=$db.dbpubchemcompidfield,chebiid=$db.dbchebiidfield,hmdbid=$db.dbhmdbidfield,keggid=$db.dbkeggidfield" 33 --db-fields "$db.dbfields"
39 --db-ms-modes "pos=$db.dbmsposmode,neg=$db.dbmsnegmode" 34 --db-ms-modes "$db.dbmsmodes"
40 --db-rt-unit $db.dbrtunit 35 --db-rt-unit "$db.dbrtunit"
41 #end if 36 #end if
42 #if $db.dbtype == "peakforest" 37 #if $db.dbtype == "peakforest"
43 -d peakforest 38 -d peakforest
44 --db-token "$db.dbtoken" 39 --db-token "$db.dbtoken"
45 #end if 40 #end if
46 --url "$db.dburl" 41 --url "$db.dburl"
47 42
48 ## M/Z matching 43 ## M/Z matching
49 -m $mzmode -p $mzprec -s $mzshift 44 -m $mzmode -p $mzprec -s $mzshift -u $mztolunit
50 45
51 ## Precursor matching 46 ## Precursor matching
52 #if $prec.match == "true" 47 #if $prec.match == "true"
53 --precursor-match --pos-prec "$prec.pos" --neg-prec "$prec.neg" 48 --precursor-match --pos-prec "$prec.pos" --neg-prec "$prec.neg"
54 #end if 49 #end if
70 ## Ouput setting 65 ## Ouput setting
71 --molids-sep "$molidssep" 66 --molids-sep "$molidssep"
72 ## @@@END_CHEETAH@@@ 67 ## @@@END_CHEETAH@@@
73 ]]></command> 68 ]]></command>
74 69
75 <!--====== 70 <!-- Inputs {{{1 -->
76 = INPUTS = 71 <!-- **************************************************************** -->
77 =======-->
78 72
79 <inputs> 73 <inputs>
80 74
81 <!-- DATABASE --> 75 <!-- Database {{{2 -->
82 76 <!-- **************************************************************** -->
83 <conditional name="db"> 77 <conditional name="db">
84 78
85 <param name="dbtype" label="Database" type="select" refresh_on_change="true"> 79 <param name="dbtype" label="Database" type="select" refresh_on_change="true">
86 <option value="inhouse">In-house</option> 80 <option value="inhouse">In-house</option>
87 <option value="peakforest">Peakforest</option> 81 <option value="peakforest">Peakforest</option>
88 </param> 82 </param>
89 83
84 <!-- In-house database parameters {{{3 -->
85 <!-- **************************************************************** -->
90 <when value="inhouse"> 86 <when value="inhouse">
91 <!-- Database file --> 87 <!-- Database file -->
92 <param name="dburl" label="Database file" type="data" format="tabular,tsv" refresh_on_change="true" help="Decimal: '.', missing: NA, mode: character and numerical, sep: tabular. Retention time values must be in seconds."/> 88 <param name="dburl" label="Database file" type="data" format="tabular,tsv" refresh_on_change="true" help="Decimal: '.', missing: NA, mode: character and numerical, sep: tabular. Retention time values must be in seconds."/>
93 89
94 <!-- File database field names --> 90 <!-- File database field names -->
95 <param name="dbspectrumidfield" type="select" label="Database file Spectrum ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'spectrumid,accession,compoundid,molid')" help="Select the Spectrum ID column of the database file."/> 91 <param name="dbfields" label="Column names" type="text" size="256" value="mztheo=mztheo,chromcolrt=chromcolrt,compoundid=compoundid,chromcol=chromcol,msmode=msmode,peakattr=peakattr,pubchemcompid=pubchemcompid,chebiid=chebiid,hmdbid=hmdbid,keggid=keggid" help="The list of column names of your database in-house file, as a coma separated list of key/value pairs."/>
96 <param name="dbmzreffield" type="select" label="Database file Reference MZ column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'mztheo,mzexp,mz')" help="Select the Reference MZ column of the database file."/> 92
97 <param name="dbchromcolfield" type="select" label="Database file Chromatographic Column Name column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'chromcol,col')" help="Select the Chromatographic Column Name column of the database file." refresh_on_change="true"/>
98 <param name="dbchromcolrtfield" type="select" label="Database file Chromatographic Column Retention Time column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'chromcolrt,colrt,rt')" help="Select the Chromatographic Column Retention Time column of the database file."/>
99 <param name="dbmsmodefield" type="select" label="Database file MS Mode column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'msmode,mode')" help="Select the MS Mode column of the database file." refresh_on_change="true"/>
100 <param name="dbpeakattrfield" type="select" label="Database file Peak Attribution column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'peakattr,attr')" help="Select the Peak Attribution column of the database file."/>
101 <param name="dbpubchemcompidfield" type="select" label="Database file PubChem Compound ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'pubchemcompid,pubchemid,pubchemcomp,pubchem')" help="Select the PubChem Compound ID column of the database file."/>
102 <param name="dbchebiidfield" type="select" label="Database file ChEBI ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'chebiid,chebi')" help="Select the ChEBI ID column of the database file."/>
103 <param name="dbhmdbidfield" type="select" label="Database file HMDB Metabolite ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'hmdbid,hmdb')" help="Select the HMDB Metabolite ID column of the database file."/>
104 <param name="dbkeggidfield" type="select" label="Database file KEGG Compound ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'keggid,kegg')" help="Select the KEGG Compound ID column of the database file."/>
105
106 <!-- File database MS modes --> 93 <!-- File database MS modes -->
107 <param name="dbmsposmode" label="File database MS Positive mode" type="select" dynamic_options="get_ms_mode_value(file = db['dburl'], col = db['dbmsmodefield'], preferred = 'POS,pos,+')" help="Select the value used to identify the positive MS mode."/> 94 <param name="dbmsmodes" label="MS modes" help="Values used for the file database MS modes, as a coma separated list of key/value pairs." type="text" size="64" value="pos=pos,neg=neg"/>
108 <param name="dbmsnegmode" label="File database MS Negative mode" type="select" dynamic_options="get_ms_mode_value(file = db['dburl'], col = db['dbmsmodefield'], preferred = 'NEG,neg,-')" help="Select the value used to identify the negitive MS mode."/>
109 95
110 <!-- File database RT unit --> 96 <!-- File database RT unit -->
111 <param name="dbrtunit" label="Retention time unit" type="select" display="radio" multiple="false" help=""> 97 <param name="dbrtunit" label="Retention time unit" type="select" display="radio" multiple="false" help="">
112 <option value="sec">Seconds</option> 98 <option value="sec">Seconds</option>
113 <option value="min">Minutes</option> 99 <option value="min">Minutes</option>
114 </param> 100 </param>
115 101
116 <param name="dbtoken" type="text" size="32" value="" hidden="true"/> 102 <param name="dbtoken" type="text" size="32" value="" hidden="true"/>
117 </when> 103 </when>
118 104
105 <!-- PeakForest database parameters {{{3 -->
106 <!-- **************************************************************** -->
119 <when value="peakforest"> 107 <when value="peakforest">
120 <param name="dburl" type="text" size="128" value="https://peakforest-alpha.inra.fr/rest" refresh_on_change="true"/> 108 <param name="dburl" type="text" size="128" value="https://metabohub.peakforest.org/rest/" refresh_on_change="true"/>
121 109
122 <param name="dbtoken" label="Peakforest security token" type="text" size="32" value="" refresh_on_change="true" help="If you do not have yet a Peakforest token, go to Peakforest website and request one from your account."/> 110 <param name="dbtoken" label="Peakforest security token" type="text" size="32" value="" refresh_on_change="true" help="If you do not have yet a Peakforest token, go to Peakforest website and request one from your account."/>
123 111
124 <param name="dbchromcolfield" type="text" size="32" value="" hidden="true"/> 112 <param name="dbchromcolfield" type="text" size="32" value="" hidden="true"/>
125 </when> 113 </when>
126 </conditional> 114 </conditional>
127 115
128 <!-- INPUT --> 116 <!-- Input file {{{2 -->
129 117 <!-- **************************************************************** -->
130 <!-- Input file --> 118
131 <param name="mzrtinput" label="Input file - MZ(/RT) values" type="data" format="tabular,tsv" refresh_on_change="true" help="Decimal: '.', missing: NA, mode: character and numerical, sep: tabular. RT values must be in seconds."/> 119 <!-- Input file -->
132 120 <param name="mzrtinput" label="Input file - MZ(/RT) values" type="data" format="tabular,tsv" refresh_on_change="true" help="Decimal: '.', missing: NA, mode: character and numerical, sep: tabular. RT values must be in seconds."/>
133 <!-- Input field field names --> 121
134 <param name="inputmzfield" type="select" label="Input file MZ column name" dynamic_options="get_file_cols(file = mzrtinput, preferred = 'mzmed,mz')" help="Select the MZ column of the input file."/> 122 <!-- Input field field names -->
135 <param name="inputrtfield" type="select" label="Input file RT column name" dynamic_options="get_file_cols(file = mzrtinput, preferred = 'rtmed,rt')" help="Select the RT column of the input file."/> 123 <param name="inputfields" type="text" label="Input column names" size="64" help="Input file column names, as a coma separated list of key/value pairs." value="mz=mz,rt=rt"/>
136 124
137 <!-- Input file RT unit --> 125 <!-- Input file RT unit -->
138 <param name="inputrtunit" label="Retention time unit" type="select" display="radio" multiple="false" help=""> 126 <param name="inputrtunit" label="Retention time unit" type="select" display="radio" multiple="false" help="">
139 <option value="sec">Seconds</option> 127 <option value="sec">Seconds</option>
140 <option value="min">Minutes</option> 128 <option value="min">Minutes</option>
141 </param> 129 </param>
142 130
143 <!-- M/Z MATCHING --> 131 <!-- M/Z matching {{{2 -->
144 132 <!-- **************************************************************** -->
145 <!-- Mode --> 133
146 <param name="mzmode" label="MS mode" type="select" display="radio" multiple="false" help=""> 134 <!-- Mode -->
147 <option value="pos">Positive</option> 135 <param name="mzmode" label="MS mode" type="select" display="radio" multiple="false" help="">
148 <option value="neg">Negative</option> 136 <option value="pos">Positive</option>
149 </param> 137 <option value="neg">Negative</option>
150 138 </param>
151 <!-- MZ matching parameters --> 139
152 <param name="mzprec" label="M/Z precision (in ppm)" type="float" help="" value="5"/> 140 <!-- MZ matching parameters -->
153 <param name="mzshift" label="M/Z shift (in ppm)" type="float" help="" value="0"/> 141 <param name="mzprec" label="M/Z precision" type="float" help="" value="5"/>
154 142 <param name="mzshift" label="M/Z shift" type="float" help="" value="0"/>
155 <!-- RETENTION TIME PARAMETERS --> 143 <param name="mztolunit" label="M/Z tolerance unit" type="select" display="radio" multiple="false" help="">
156 144 <option value="ppm">PPM</option>
157 <!-- List of chromatographic columns --> 145 <option value="plain">Plain</option>
158 <param name="chromcols" type="select" label="Chromatographic columns" multiple="true" dynamic_options="get_chrom_cols(dbtype = db['dbtype'], dburl = db['dburl'], dbtoken = db['dbtoken'], col_field = db['dbchromcolfield'])" help="Select here the set of chromatographic columns against which the retention time matching will be run."/> 146 </param>
159 147
160 <!-- Tolerances --> 148 <!-- RT matching {{{2 -->
161 <param name="tolx" label="RTX retention time tolerance, parameter x (in seconds)" type="float" help="" value="5"/> 149 <!-- **************************************************************** -->
162 <param name="toly" label="RTY retention time tolerance, parameter y" type="float" help="" value="0.8"/> 150
163 <param name="tolz" label="RTZ retention time tolerance, used when precursor matching is enabled." type="float" help="" value="5"/> 151 <!-- List of chromatographic columns -->
164 152 <param name="chromcols" type="text" label="Chromatographic columns" size="2048" value=""/>
165 <!-- PRECURSOR MATCH --> 153
154 <!-- Tolerances -->
155 <param name="tolx" label="RTX" help="The retention time tolerance X parameter (in seconds)." type="float" value="5"/>
156 <param name="toly" label="RTY" help="The retention time tolerance Y parameter (no unit)." type="float" value="0.8"/>
157 <param name="tolz" label="RTZ" help="The retention time tolerance used when precursor matching is enabled." type="float" value="5"/>
158
159 <!-- Precursor matching {{{2 -->
160 <!-- **************************************************************** -->
166 <conditional name="prec"> 161 <conditional name="prec">
167 162
168 <param name="match" label="Precursor match" type="select"> 163 <param name="match" label="Precursor match" type="select">
169 <option value="false">Off</option> 164 <option value="false">Off</option>
170 <option value="true">On</option> 165 <option value="true">On</option>
196 </sanitizer> 191 </sanitizer>
197 </param> 192 </param>
198 </when> 193 </when>
199 </conditional> 194 </conditional>
200 195
201 <!-- OUTPUT --> 196 <!-- Output format {{{2 -->
197 <!-- **************************************************************** -->
198
202 <!-- Molecule IDs separator character --> 199 <!-- Molecule IDs separator character -->
203 <param name="molidssep" label="Molecule IDs separator character" type="text" size="3" value="|" help=""> 200 <param name="molidssep" label="Multiple matches separator character" type="text" size="3" value="|" help="">
204 <sanitizer> 201 <sanitizer>
205 <valid initial="string.printable"> 202 <valid initial="string.printable">
206 <remove value='"'/> 203 <remove value='"'/>
207 </valid> 204 </valid>
208 <mapping initial="none"> 205 <mapping initial="none">
211 </sanitizer> 208 </sanitizer>
212 </param> 209 </param>
213 210
214 </inputs> 211 </inputs>
215 212
216 <!--======= 213 <!-- Outputs {{{1 -->
217 = OUTPUTS = 214 <!-- **************************************************************** -->
218 ========-->
219 215
220 <outputs> 216 <outputs>
221 217
222 <!-- Output file -->
223 <data name="mainoutput" label="lcmsmatch_${mzrtinput.name}" format="tabular"/> 218 <data name="mainoutput" label="lcmsmatch_${mzrtinput.name}" format="tabular"/>
224 <data name="peaksoutput" label="lcmsmatch_${mzrtinput.name}_peaks" format="tabular"/> 219 <data name="peaksoutput" label="lcmsmatch_${mzrtinput.name}_peaks" format="tabular"/>
225 <data name="htmloutput" label="lcmsmatch_${mzrtinput.name}.html" format="html"/> 220 <data name="htmloutput" label="lcmsmatch_${mzrtinput.name}.html" format="html"/>
226 221
227 </outputs> 222 </outputs>
228 223
229 <!--===== 224 <!-- Tests {{{1 -->
230 = TESTS = 225 <!-- **************************************************************** -->
231 ======-->
232 226
233 <tests> 227 <tests>
234 228
235 <!-- File database test --> 229 <!-- Test 1, MZ only {{{2 -->
230 <!-- **************************************************************** -->
236 <test> 231 <test>
237 <param name="dbtype" value="inhouse"/> 232 <param name="dbtype" value="inhouse"/>
238 <param name="dburl" value="filedb.tsv"/> 233 <param name="dburl" value="filedb.tsv"/>
239 <param name="dbfields" value=""/>
240 <param name="dbmsmodes" value=""/>
241 <param name="mzrtinput" value="mz-input-small.tsv"/> 234 <param name="mzrtinput" value="mz-input-small.tsv"/>
242 <param name="inputmzfield" value="mzmed"/> 235 <param name="inputfields" value="mz=mz"/>
243 <param name="inputrtfield" value="rtmed"/>
244 <param name="mzmode" value="pos"/> 236 <param name="mzmode" value="pos"/>
245 <output name="mainoutput" file="filedb-small-mz-match-output.tsv"/> 237 <output name="mainoutput" file="test_1_main_output.tsv"/>
246 <output name="peaksoutput" file="filedb-small-mz-match-peaks-output.tsv"/> 238 <output name="peaksoutput" file="test_1_peaks_output.tsv"/>
247 <output name="htmloutput" file="filedb-small-mz-match-html-output.html"/> 239 <output name="htmloutput" file="test_1_peaks_output.html"/>
248 </test> 240 </test>
249 241
250 <!-- File database test --> 242 <!-- Test 2, MZ & RT {{{2 -->
251 <!-- 243 <!-- **************************************************************** -->
252 <test> 244 <test>
253 <param name="dbtype" value="peakforest"/> 245 <param name="dbtype" value="inhouse"/>
254 <param name="dbtoken" value="@PEAKFOREST_TOKEN@"/> 246 <param name="dburl" value="filedb.tsv"/>
255 <param name="mzrtinput" value="mz-input-small.tsv"/> 247 <param name="mzrtinput" value="mzrt-input-small.tsv"/>
256 <param name="inputfields" value=""/> 248 <param name="inputfields" value="mz=mz,rt=rt"/>
257 <param name="mzmode" value="pos"/> 249 <param name="mzmode" value="pos"/>
258 <output name="mainoutput"> 250 <param name="dbrtunit" value="min"/>
259 <assert_contents> 251 <param name="chromcols" value="col12"/>
260 <has_text text="mz"/> 252 <param name="tolx" value="5"/>
261 </assert_contents> 253 <param name="toly" value="0.8"/>
262 </output> 254 <output name="mainoutput" file="test_2_main_output.tsv"/>
255 <output name="peaksoutput" file="test_2_peaks_output.tsv"/>
256 <output name="htmloutput" file="test_2_peaks_output.html"/>
263 </test> 257 </test>
264 --> 258
259 <!-- Test 3, MZ & RT with precursor match {{{2 -->
260 <!-- **************************************************************** -->
261 <test>
262 <param name="dbtype" value="inhouse"/>
263 <param name="dburl" value="filedb.tsv"/>
264 <param name="mzrtinput" value="mzrt-input-small.tsv"/>
265 <param name="inputfields" value="mz=mz,rt=rt"/>
266 <param name="mzmode" value="pos"/>
267 <param name="dbrtunit" value="min"/>
268 <param name="chromcols" value="col12"/>
269 <param name="tolx" value="5"/>
270 <param name="toly" value="0.8"/>
271 <param name="match" value="true"/>
272 <param name="neg" value="[(M-H)]-,[M-H]-"/>
273 <param name="pos" value="[(M+H)]+,[M+H]+"/>
274 <param name="tolz" value="60"/>
275 <output name="mainoutput" file="test_3_main_output.tsv"/>
276 <output name="peaksoutput" file="test_3_peaks_output.tsv"/>
277 <output name="htmloutput" file="test_3_peaks_output.html"/>
278 </test>
279
265 </tests> 280 </tests>
266 281
267 <!--==== 282 <!-- Help {{{1 -->
268 = HELP = 283 <!-- **************************************************************** -->
269 =====-->
270 284
271 <help> 285 <help>
272 <!-- @@@BEGIN_RST@@@ --> 286 <!-- @@@BEGIN_RST@@@ -->
273 287
274 ============== 288 ==============
294 Be careful to always provide UTF-8 encoded files, unless you do not use special characters at all. For instance, greek letters in molecule names give errors if the file is in latin1 (ISO 8859-1) or Windows 1252 (not distinguishable from latin1) encoding. 308 Be careful to always provide UTF-8 encoded files, unless you do not use special characters at all. For instance, greek letters in molecule names give errors if the file is in latin1 (ISO 8859-1) or Windows 1252 (not distinguishable from latin1) encoding.
295 309
296 Single file database 310 Single file database
297 ==================== 311 ====================
298 312
299 The database used is provided as a single file, in tabular format, through the *Database file* field. This file must contain a list of MS peaks, with possibly retention times. 313 In this case, the database used is provided as a single file by the user, in tabular format, through the *Database file* field. This file must contain a list of MS peaks, with possibly retention times.
300 Peaks are "duplicated" as much as necessary. For instance if 3 retention times are available on a compound with 10 peaks in positive mode, then there will be 30 lines for this compound in positive mode. 314 Peaks are "duplicated" as much as necessary. For instance if 3 retention times are available on a compound with 10 peaks in positive mode, then there will be 30 lines for this compound in positive mode.
301 315
302 The file must contain a header with the column names. The names are free, but must be provided through the different fields named *Database file ... column name*. 316 The file must contain a header with the column names. The names are free, but must be provided through the *Column names* field as a comma separated list of key/value pairs. See default value as an example. Of course it is much easier if your database file uses the default column names used in the default value of the *Column names* field. The column names shown in the default values, are only the ones used by the algorithm. You can provide any additional columns in your database file, they will be copied in the output.
303 Then you must provide the values used to identify the MS modes (positive and negative). 317
304 318 Then you must provide the values used to identify the MS modes (positive and negative), using field *MS modes*.
305 A last information about the single file database is the unit of the retention times, either in seconds or in minutes. 319
320 A last information about the single file database is the unit of the retention times, either in seconds or in minutes. Use the field "Retention time unit" to provide this information.
306 321
307 Example of database file (totally fake, no meaning): 322 Example of database file (totally fake, no meaning):
308 323
309 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ 324 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+
310 | molid | mode | mz | composition | attribution | col | rt | molcomp | molmass | molnames | 325 | molid | mode | mz | composition | attribution | col | rt | molcomp | molmass | molnames |
329 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ 344 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+
330 | A10 | "POS" | 72.080775 | "P9Z4W410 O0" | "[(M+H)-(J15L2M6O2)]+" | "colpp" | 0.89 | "J114L6M62O2" | 146.10553 | "Blablaine" | 345 | A10 | "POS" | 72.080775 | "P9Z4W410 O0" | "[(M+H)-(J15L2M6O2)]+" | "colpp" | 0.89 | "J114L6M62O2" | 146.10553 | "Blablaine" |
331 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ 346 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+
332 | A10 | "POS" | 145.097154 | "P92Z6W413 O2" | "[(M+H)-(H2)]+" | "hcoltt" | 0.8 | "J114L6M62O2" | 146.10553 | "Blablaine" | 347 | A10 | "POS" | 145.097154 | "P92Z6W413 O2" | "[(M+H)-(H2)]+" | "hcoltt" | 0.8 | "J114L6M62O2" | 146.10553 | "Blablaine" |
333 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ 348 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+
349
350 The corresponding value of the *Column names* field for this database field would be:
351 **mztheo=mz,chromcolrt=rt,compoundid=molid,chromcol=col,msmode=mode,peakattr=attribution**.
352
353 And the value of the *MS modes* field would be: **pos=POS,neg=NEG**.
334 354
335 MZ/RT input file 355 MZ/RT input file
336 ================ 356 ================
337 357
338 The input to provide is a file, in a tabular format (or TSV: Tab Seperated Values), containing the list of M/Z values, with possibly also RT values. 358 The input to provide is a dataset in a tabular format (or TSV: Tab Seperated Values), containing the list of M/Z values, with possibly also RT values. The dataset is chosen through the field *Input file - MZ(/RT) values*.
339 359
340 The column names for the M/Z and RT values must be provided through the fields *Input file MZ column name* and *Input file RT column name*. 360 The column names for the M/Z and RT values must be provided through the field *Input column names*, as a comma separated list of key/value pairs.
341 As a consequence, the file must contain a header line. 361 The file/dataset must contain a header line with the same names specified in the field *Input column names*.
342 362
343 The unit of the retention time has to be provided with the field *Retention time unit*. 363 The unit of the retention time has to be provided with the field *Retention time unit*.
344 364
345 Example of file input: 365 Example of file input:
346 366
362 382
363 ------------ 383 ------------
364 M/Z matching 384 M/Z matching
365 ------------ 385 ------------
366 386
367 In the simplest form of the algorithm only the *m/z* values are matched against the database peaks. This happens if both *Retention time match* and *Precursor match* are off. 387 In the simplest form of the algorithm only the *M/Z* values are matched against the database peaks. This happens if both *Retention time match* and *Precursor match* are off.
368 388
369 The first parameter is the MS mode, specified through the *MS mode* parameter. 389 The first parameter is the MS mode, specified through the *MS mode* parameter.
370 390
371 The parameters *M/Z precision* and *M/Z shift* are used by the algorithm in the following formula in order to match an *m/z* value: 391 The parameters *M/Z precision* and *M/Z shift* are used by the algorithm in the following formula in order to match an *M/Z* value:
372 392
373 mz (1 + (- shift - precision) / 10^6) &lt; mzref &lt; mz (1 + (- shift - precision) / 10^6) 393 mz - shift - precision &lt; mzref &lt; mz - shift + precision
374 394
375 Where *mzref* is the M/Z of reference from the database peak that is tested. If this double inequality is true, then the *m/z* value is matched with this peak. 395 Where *mzref* is the M/Z of reference from the database peak that is tested. If this double inequality is true, then the *M/Z* value is matched with this peak.
396
397 The parameters *shift* and *precision* can be input in either PPM values of M/Z or in plain values. Use the field *M/Z tolerance unit* to set the unit.
376 398
377 -------------------- 399 --------------------
378 Retention time match 400 Retention time match
379 -------------------- 401 --------------------
380 402
381 If at least one column is selected inside the *Chromatographic columns* parameter section, then retention time is also matched, in addition to the *m/z* value, according to the following formula: 403 If at least one column is selected inside the *Chromatographic columns* parameter section, then retention time is also matched, in addition to the *M/Z* value, according to the following formula:
382 404
383 rt - x - rt^y &lt; colrt &lt; rt + x + rt^y 405 rt - x - rt^y &lt; colrt &lt; rt + x + rt^y
384 406
385 Where *x* is the value of the parameter *RTX* and *y* the value of the parameter *RTY*. 407 Where *x* is the value of the parameter *RTX* and *y* the value of the parameter *RTY*.
386 408
387 If for a reference compound the database does not contain retention time for at least one of the specified columns, then only the *m/z* value is matched against the peaks of the reference compound. This means that in the results you can find compounds that do no match the provided retention time value. 409 If for a reference compound the database does not contain retention time for at least one of the specified columns, then only the *M/Z* value is matched against the peaks of the reference compound. This means that in the results you can find compounds that do no match the provided retention time value.
388 410
389 The *RTZ* parameter is used in the *Precursor match* algorithm (see below). 411 The *RTZ* parameter is used in the *Precursor match* algorithm (see below).
390 412
391 --------------- 413 ---------------
392 Precursor match 414 Precursor match
393 --------------- 415 ---------------
394 416
395 If the "Precursor match" option is enabled inside the parameters section, then a more sophisticated version of the algorithm, which is executed in two steps, is used. 417 If the "Precursor match" option is enabled inside the parameters section, then a more sophisticated version of the algorithm, which is executed in two steps, is used.
396 418
397 This algorithm takes two more parameters, one for each MS mode. These are the lists of precursors. Since the matching is run for one MS mode only, only one of the two parameters is used. Inside the single file database, all the peaks whose **attr** column value is equal to one of the precursor listed in *List of negative precursors* or *List of positive precursors*, depending on the mode, are considered as precursor peaks. 419 This algorithm takes two more parameters, one for each MS mode. These are the lists of precursors. Since the matching is run for one MS mode only, only one of the two parameters is used. Inside the single file database, all the peaks whose **peakattr** column value is equal to one of the precursor listed in *List of negative precursors* or *List of positive precursors*, depending on the mode, are considered as precursor peaks.
398 420
399 M/Z matching using precursor matching 421 M/Z matching using precursor matching
400 ===================================== 422 =====================================
401 423
402 1. Using the normal M/Z matching algorithm described above, we first look only for precursor peaks ([(M+H)]+, [(M+Na)]+, [(M+Cl)]-, ...). 424 1. Using the normal M/Z matching algorithm described above, we first look only for precursor peaks ([(M+H)]+, [(M+Na)]+, [(M+Cl)]-, ...).
412 434
413 --------------- 435 ---------------
414 Output settings 436 Output settings
415 --------------- 437 ---------------
416 438
417 The *Molecule IDs separator character* is used to customize the character used to separate the molecule IDs of the **molid** column inside the *main* output file. 439 The *Multiple matches separator character* is used to customize the character used to separate the multiple values inside each row in the *main* output dataset. The *main* output contains as much rows as the MZ/RT input dataset, thus when for one MZ/RT value the algorithm finds more than one match, it concatenates the matches using this separator character.
418 440
419 Output files 441 Output files
420 ============ 442 ============
421 443
422 Three files are output by the tool. 444 Three files are output by the tool.
423 445
424 +-------------+--------------------------------------+--------------------------------------------------------+ 446 +-------------+--------------------------------------+--------------------------------------------------------+
425 | Outputs | File name | Description | 447 | Outputs | File name | Description |
426 +-------------+--------------------------------------+--------------------------------------------------------+ 448 +-------------+--------------------------------------+--------------------------------------------------------+
427 | Main output | lcmsmatching_{input_file_name} | Contains the list of compounds that have been matched. | 449 | Main output | lcmsmatching_{input_file_name} | Contains the same data as the input dataset, with |
450 | | | match result included on each row. If more than one |
451 | | | match is found for a row, the different values of the |
452 | | | match are concatenated using the provided separator |
453 | | | character. |
428 +-------------+--------------------------------------+--------------------------------------------------------+ 454 +-------------+--------------------------------------+--------------------------------------------------------+
429 | Peak list | lcmsmatching_peaks_{input_file_name} | Contains all matched database peaks. | 455 | Peak list | lcmsmatching_{input_file_name}_peaks | Contains the same data as the input dataset, with |
456 | | | match result included on each row. If more than one |
457 | | | match is found for a row, then the row is duplicated. |
458 | | | Hence there is either no match for a row, or one |
459 | | | single match. |
430 +-------------+--------------------------------------+--------------------------------------------------------+ 460 +-------------+--------------------------------------+--------------------------------------------------------+
431 | HTML output | lcmsmatching_{input_file_name}.html | Contains the two tables on one page. | 461 | HTML output | lcmsmatching_{input_file_name}.html | Contains the same table as *Peak list* but in HTML |
462 | | | format and with links to external databases if columns |
463 | | | for PubChem Compound, ChEBI, HMDB Metabolites or KEGG |
464 | | | Compounds are provided. |
432 +-------------+--------------------------------------+--------------------------------------------------------+ 465 +-------------+--------------------------------------+--------------------------------------------------------+
433 466
434 The **main** output is identical to the input file, to which is added an *msmatching* column. This column contains a list of IDs of the compounds that have been matched for this couple of (m/z, rt) values. 467 The match results are output as new columns appended to the columns provided inside the MZ/RT input dataset, and prefixed with "lcmsmatching.".
435
436 The **peak list** output contains all database peaks that have been matched, for each (m/z, rt) input couple. Thus for each (m/z, rt) couple, there will be zero, one or more matched peaks output. The columns output are *mz*, *rt*, *id*, *mztheo*, *col*, *colrt*, *attribution* and *composition*, where *id* is the compound ID, *mztheo* is the theoretical mass of the fragment, *col* is the matched column and *colrt* is the retention time measured on the column for the reference compound.
437
438 The **HTML** output contains the peak table with links toward HMDB, KEGG, ChEBI and PubChem public databases, when IDs are available.
439 468
440 ===== 469 =====
441 About 470 About
442 ===== 471 =====
443 472
453 Data and algorithms have been kindly provided by Christophe Junot at *DSV/IBITEC-S/SPI* (*CEA/Saclay*), from a former application developped by Cyrille Petat and Arnaud Martel at *DSV/IBITEC-S/DIR* (*CEA/Saclay*). 482 Data and algorithms have been kindly provided by Christophe Junot at *DSV/IBITEC-S/SPI* (*CEA/Saclay*), from a former application developped by Cyrille Petat and Arnaud Martel at *DSV/IBITEC-S/DIR* (*CEA/Saclay*).
454 483
455 .. class:: infomark 484 .. class:: infomark
456 485
457 **Please cite** 486 **Please cite**
458 R Core Team (2013). R: A language and Environment for Statistical Computing. http://www.r-project.org 487 R Core Team (2013). R: A language and Environment for Statistical Computing. http://www.r-project.org.
488
489 ==============
490 Changelog/News
491 ==============
492
493 **Version 4.0.0 - 02/01/2019**
494
495 - NEW: Use of R biodb library. Connection to databases and matching have been moved to biodb library, which is maintained separately at http://github.com/pkrog/biodb.
459 496
460 <!-- @@@END_RST@@@ --> 497 <!-- @@@END_RST@@@ -->
461 </help> 498 </help>
462 499
463 <!--========= 500 <!-- Citations {{{1 -->
464 = CITATIONS = 501 <!-- **************************************************************** -->
465 ==========--> 502
466 503 <citations>
467 <citations/> 504 <citation type="bibtex">@unpublished{FGiacomoni2017,
505 title = {PeakForest [Internet], a spectral data portal for Metabolomics community - storing, curating and annotation services for metabolic profiles of biological matrix.},
506 author = {Franck Giacomoni, Nils Paulhe},
507 institution = {INRA / MetaboHUB},
508 year = {2017},
509 note = {Unpublished paper, available from: https://peakforest.org/.}
510 }</citation>
511 </citations>
468 512
469 </tool> 513 </tool>