Mercurial > repos > prog > lcmsmatching
comparison lcmsmatching.xml @ 6:f86fec07f392 draft default tip
planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
author | prog |
---|---|
date | Fri, 22 Feb 2019 16:04:22 -0500 |
parents | fb9c0409d85c |
children |
comparison
equal
deleted
inserted
replaced
5:fb9c0409d85c | 6:f86fec07f392 |
---|---|
1 <tool id="lcmsmatching" name="LC/MS matching" version="3.3.1" profile="16.01"> | 1 <!-- vi: se fdm=marker : --> |
2 | 2 <tool id="lcmsmatching" name="LCMS matching" version="4.0.2" profile="18.05"> |
3 <description>Annotation of MS peaks using matching on a spectra database.</description> | 3 |
4 | 4 <description>Annotation of LCMS peaks using matching on a in-house spectra database or on PeakForest spectra database.</description> |
5 | |
6 <!-- Requirements {{{1 --> | |
7 <!-- **************************************************************** --> | |
5 <requirements> | 8 <requirements> |
6 <!--<requirement type="package" version="3.3.3">r</requirement>--> | 9 <requirement type="package" version="1.2.2">r-biodb</requirement> |
7 <requirement type="package" version="7.0">readline</requirement> <!-- Try readline 7.0 --> | 10 <requirement type="package" version="1.20.2">r-getopt</requirement> |
8 <requirement type="package" version="1.20.0">r-getopt</requirement> | 11 <requirement type="package" version="0.2_15">r-codetools</requirement> <!-- R_VERSION="0.2-15" IMPORTANT Do not remove, used by travis_install_deps.sh script. --> <!-- codetools package is needed because of the following error when running Galaxy on Travis-CI in planemo tests: "code for methods in class “HtmlWriter” was not checked for suspicious field assignments (recommended package ‘codetools’ not available?)". --> |
9 <requirement type="package" version="1.0.0">r-stringr</requirement> | 12 |
10 <requirement type="package" version="1.8.3">r-plyr</requirement> | |
11 <requirement type="package" version="3.98">r-xml</requirement> | |
12 <requirement type="package" version="1.0_6">r-bitops</requirement> | |
13 <requirement type="package" version="1.95">r-rcurl</requirement> | |
14 <requirement type="package" version="1.1">r-jsonlite</requirement> | |
15 </requirements> | 13 </requirements> |
16 | 14 |
17 <code file="list-chrom-cols.py"/> | 15 <!-- Command {{{1 --> |
18 <code file="list-file-cols.py"/> | 16 <!-- **************************************************************** --> |
19 <code file="list-ms-mode-values.py"/> | |
20 | |
21 <!--======= | |
22 = COMMAND = | |
23 ========--> | |
24 | 17 |
25 <command> | 18 <command> |
26 <![CDATA[ | 19 <![CDATA[ |
27 ## @@@BEGIN_CHEETAH@@@ | 20 ## @@@BEGIN_CHEETAH@@@ |
28 $__tool_directory__/search-mz | 21 $__tool_directory__/lcmsmatching |
22 | |
23 --log-to-stdout | |
29 | 24 |
30 ## Input file | 25 ## Input file |
31 -i "$mzrtinput" | 26 -i "$mzrtinput" |
32 --input-col-names "mz=$inputmzfield,rt=$inputrtfield" | 27 --input-col-names "$inputfields" |
33 --rtunit "$inputrtunit" | 28 --rtunit "$inputrtunit" |
34 | 29 |
35 ## Database | 30 ## Database |
36 #if $db.dbtype == "inhouse" | 31 #if $db.dbtype == "inhouse" |
37 -d file | 32 -d file |
38 --db-fields "mztheo=$db.dbmzreffield,chromcolrt=$db.dbchromcolrtfield,compoundid=$db.dbspectrumidfield,chromcol=$db.dbchromcolfield,msmode=$db.dbmsmodefield,peakattr=$db.dbpeakattrfield,pubchemcompid=$db.dbpubchemcompidfield,chebiid=$db.dbchebiidfield,hmdbid=$db.dbhmdbidfield,keggid=$db.dbkeggidfield" | 33 --db-fields "$db.dbfields" |
39 --db-ms-modes "pos=$db.dbmsposmode,neg=$db.dbmsnegmode" | 34 --db-ms-modes "$db.dbmsmodes" |
40 --db-rt-unit $db.dbrtunit | 35 --db-rt-unit "$db.dbrtunit" |
41 #end if | 36 #end if |
42 #if $db.dbtype == "peakforest" | 37 #if $db.dbtype == "peakforest" |
43 -d peakforest | 38 -d peakforest |
44 --db-token "$db.dbtoken" | 39 --db-token "$db.dbtoken" |
45 #end if | 40 #end if |
46 --url "$db.dburl" | 41 --url "$db.dburl" |
47 | 42 |
48 ## M/Z matching | 43 ## M/Z matching |
49 -m $mzmode -p $mzprec -s $mzshift | 44 -m $mzmode -p $mzprec -s $mzshift -u $mztolunit |
50 | 45 |
51 ## Precursor matching | 46 ## Precursor matching |
52 #if $prec.match == "true" | 47 #if $prec.match == "true" |
53 --precursor-match --pos-prec "$prec.pos" --neg-prec "$prec.neg" | 48 --precursor-match --pos-prec "$prec.pos" --neg-prec "$prec.neg" |
54 #end if | 49 #end if |
70 ## Ouput setting | 65 ## Ouput setting |
71 --molids-sep "$molidssep" | 66 --molids-sep "$molidssep" |
72 ## @@@END_CHEETAH@@@ | 67 ## @@@END_CHEETAH@@@ |
73 ]]></command> | 68 ]]></command> |
74 | 69 |
75 <!--====== | 70 <!-- Inputs {{{1 --> |
76 = INPUTS = | 71 <!-- **************************************************************** --> |
77 =======--> | |
78 | 72 |
79 <inputs> | 73 <inputs> |
80 | 74 |
81 <!-- DATABASE --> | 75 <!-- Database {{{2 --> |
82 | 76 <!-- **************************************************************** --> |
83 <conditional name="db"> | 77 <conditional name="db"> |
84 | 78 |
85 <param name="dbtype" label="Database" type="select" refresh_on_change="true"> | 79 <param name="dbtype" label="Database" type="select" refresh_on_change="true"> |
86 <option value="inhouse">In-house</option> | 80 <option value="inhouse">In-house</option> |
87 <option value="peakforest">Peakforest</option> | 81 <option value="peakforest">Peakforest</option> |
88 </param> | 82 </param> |
89 | 83 |
84 <!-- In-house database parameters {{{3 --> | |
85 <!-- **************************************************************** --> | |
90 <when value="inhouse"> | 86 <when value="inhouse"> |
91 <!-- Database file --> | 87 <!-- Database file --> |
92 <param name="dburl" label="Database file" type="data" format="tabular,tsv" refresh_on_change="true" help="Decimal: '.', missing: NA, mode: character and numerical, sep: tabular. Retention time values must be in seconds."/> | 88 <param name="dburl" label="Database file" type="data" format="tabular,tsv" refresh_on_change="true" help="Decimal: '.', missing: NA, mode: character and numerical, sep: tabular. Retention time values must be in seconds."/> |
93 | 89 |
94 <!-- File database field names --> | 90 <!-- File database field names --> |
95 <param name="dbspectrumidfield" type="select" label="Database file Spectrum ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'spectrumid,accession,compoundid,molid')" help="Select the Spectrum ID column of the database file."/> | 91 <param name="dbfields" label="Column names" type="text" size="256" value="mztheo=mztheo,chromcolrt=chromcolrt,compoundid=compoundid,chromcol=chromcol,msmode=msmode,peakattr=peakattr,pubchemcompid=pubchemcompid,chebiid=chebiid,hmdbid=hmdbid,keggid=keggid" help="The list of column names of your database in-house file, as a coma separated list of key/value pairs."/> |
96 <param name="dbmzreffield" type="select" label="Database file Reference MZ column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'mztheo,mzexp,mz')" help="Select the Reference MZ column of the database file."/> | 92 |
97 <param name="dbchromcolfield" type="select" label="Database file Chromatographic Column Name column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'chromcol,col')" help="Select the Chromatographic Column Name column of the database file." refresh_on_change="true"/> | |
98 <param name="dbchromcolrtfield" type="select" label="Database file Chromatographic Column Retention Time column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'chromcolrt,colrt,rt')" help="Select the Chromatographic Column Retention Time column of the database file."/> | |
99 <param name="dbmsmodefield" type="select" label="Database file MS Mode column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'msmode,mode')" help="Select the MS Mode column of the database file." refresh_on_change="true"/> | |
100 <param name="dbpeakattrfield" type="select" label="Database file Peak Attribution column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'peakattr,attr')" help="Select the Peak Attribution column of the database file."/> | |
101 <param name="dbpubchemcompidfield" type="select" label="Database file PubChem Compound ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'pubchemcompid,pubchemid,pubchemcomp,pubchem')" help="Select the PubChem Compound ID column of the database file."/> | |
102 <param name="dbchebiidfield" type="select" label="Database file ChEBI ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'chebiid,chebi')" help="Select the ChEBI ID column of the database file."/> | |
103 <param name="dbhmdbidfield" type="select" label="Database file HMDB Metabolite ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'hmdbid,hmdb')" help="Select the HMDB Metabolite ID column of the database file."/> | |
104 <param name="dbkeggidfield" type="select" label="Database file KEGG Compound ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'keggid,kegg')" help="Select the KEGG Compound ID column of the database file."/> | |
105 | |
106 <!-- File database MS modes --> | 93 <!-- File database MS modes --> |
107 <param name="dbmsposmode" label="File database MS Positive mode" type="select" dynamic_options="get_ms_mode_value(file = db['dburl'], col = db['dbmsmodefield'], preferred = 'POS,pos,+')" help="Select the value used to identify the positive MS mode."/> | 94 <param name="dbmsmodes" label="MS modes" help="Values used for the file database MS modes, as a coma separated list of key/value pairs." type="text" size="64" value="pos=pos,neg=neg"/> |
108 <param name="dbmsnegmode" label="File database MS Negative mode" type="select" dynamic_options="get_ms_mode_value(file = db['dburl'], col = db['dbmsmodefield'], preferred = 'NEG,neg,-')" help="Select the value used to identify the negitive MS mode."/> | |
109 | 95 |
110 <!-- File database RT unit --> | 96 <!-- File database RT unit --> |
111 <param name="dbrtunit" label="Retention time unit" type="select" display="radio" multiple="false" help=""> | 97 <param name="dbrtunit" label="Retention time unit" type="select" display="radio" multiple="false" help=""> |
112 <option value="sec">Seconds</option> | 98 <option value="sec">Seconds</option> |
113 <option value="min">Minutes</option> | 99 <option value="min">Minutes</option> |
114 </param> | 100 </param> |
115 | 101 |
116 <param name="dbtoken" type="text" size="32" value="" hidden="true"/> | 102 <param name="dbtoken" type="text" size="32" value="" hidden="true"/> |
117 </when> | 103 </when> |
118 | 104 |
105 <!-- PeakForest database parameters {{{3 --> | |
106 <!-- **************************************************************** --> | |
119 <when value="peakforest"> | 107 <when value="peakforest"> |
120 <param name="dburl" type="text" size="128" value="https://peakforest-alpha.inra.fr/rest" refresh_on_change="true"/> | 108 <param name="dburl" type="text" size="128" value="https://metabohub.peakforest.org/rest/" refresh_on_change="true"/> |
121 | 109 |
122 <param name="dbtoken" label="Peakforest security token" type="text" size="32" value="" refresh_on_change="true" help="If you do not have yet a Peakforest token, go to Peakforest website and request one from your account."/> | 110 <param name="dbtoken" label="Peakforest security token" type="text" size="32" value="" refresh_on_change="true" help="If you do not have yet a Peakforest token, go to Peakforest website and request one from your account."/> |
123 | 111 |
124 <param name="dbchromcolfield" type="text" size="32" value="" hidden="true"/> | 112 <param name="dbchromcolfield" type="text" size="32" value="" hidden="true"/> |
125 </when> | 113 </when> |
126 </conditional> | 114 </conditional> |
127 | 115 |
128 <!-- INPUT --> | 116 <!-- Input file {{{2 --> |
129 | 117 <!-- **************************************************************** --> |
130 <!-- Input file --> | 118 |
131 <param name="mzrtinput" label="Input file - MZ(/RT) values" type="data" format="tabular,tsv" refresh_on_change="true" help="Decimal: '.', missing: NA, mode: character and numerical, sep: tabular. RT values must be in seconds."/> | 119 <!-- Input file --> |
132 | 120 <param name="mzrtinput" label="Input file - MZ(/RT) values" type="data" format="tabular,tsv" refresh_on_change="true" help="Decimal: '.', missing: NA, mode: character and numerical, sep: tabular. RT values must be in seconds."/> |
133 <!-- Input field field names --> | 121 |
134 <param name="inputmzfield" type="select" label="Input file MZ column name" dynamic_options="get_file_cols(file = mzrtinput, preferred = 'mzmed,mz')" help="Select the MZ column of the input file."/> | 122 <!-- Input field field names --> |
135 <param name="inputrtfield" type="select" label="Input file RT column name" dynamic_options="get_file_cols(file = mzrtinput, preferred = 'rtmed,rt')" help="Select the RT column of the input file."/> | 123 <param name="inputfields" type="text" label="Input column names" size="64" help="Input file column names, as a coma separated list of key/value pairs." value="mz=mz,rt=rt"/> |
136 | 124 |
137 <!-- Input file RT unit --> | 125 <!-- Input file RT unit --> |
138 <param name="inputrtunit" label="Retention time unit" type="select" display="radio" multiple="false" help=""> | 126 <param name="inputrtunit" label="Retention time unit" type="select" display="radio" multiple="false" help=""> |
139 <option value="sec">Seconds</option> | 127 <option value="sec">Seconds</option> |
140 <option value="min">Minutes</option> | 128 <option value="min">Minutes</option> |
141 </param> | 129 </param> |
142 | 130 |
143 <!-- M/Z MATCHING --> | 131 <!-- M/Z matching {{{2 --> |
144 | 132 <!-- **************************************************************** --> |
145 <!-- Mode --> | 133 |
146 <param name="mzmode" label="MS mode" type="select" display="radio" multiple="false" help=""> | 134 <!-- Mode --> |
147 <option value="pos">Positive</option> | 135 <param name="mzmode" label="MS mode" type="select" display="radio" multiple="false" help=""> |
148 <option value="neg">Negative</option> | 136 <option value="pos">Positive</option> |
149 </param> | 137 <option value="neg">Negative</option> |
150 | 138 </param> |
151 <!-- MZ matching parameters --> | 139 |
152 <param name="mzprec" label="M/Z precision (in ppm)" type="float" help="" value="5"/> | 140 <!-- MZ matching parameters --> |
153 <param name="mzshift" label="M/Z shift (in ppm)" type="float" help="" value="0"/> | 141 <param name="mzprec" label="M/Z precision" type="float" help="" value="5"/> |
154 | 142 <param name="mzshift" label="M/Z shift" type="float" help="" value="0"/> |
155 <!-- RETENTION TIME PARAMETERS --> | 143 <param name="mztolunit" label="M/Z tolerance unit" type="select" display="radio" multiple="false" help=""> |
156 | 144 <option value="ppm">PPM</option> |
157 <!-- List of chromatographic columns --> | 145 <option value="plain">Plain</option> |
158 <param name="chromcols" type="select" label="Chromatographic columns" multiple="true" dynamic_options="get_chrom_cols(dbtype = db['dbtype'], dburl = db['dburl'], dbtoken = db['dbtoken'], col_field = db['dbchromcolfield'])" help="Select here the set of chromatographic columns against which the retention time matching will be run."/> | 146 </param> |
159 | 147 |
160 <!-- Tolerances --> | 148 <!-- RT matching {{{2 --> |
161 <param name="tolx" label="RTX retention time tolerance, parameter x (in seconds)" type="float" help="" value="5"/> | 149 <!-- **************************************************************** --> |
162 <param name="toly" label="RTY retention time tolerance, parameter y" type="float" help="" value="0.8"/> | 150 |
163 <param name="tolz" label="RTZ retention time tolerance, used when precursor matching is enabled." type="float" help="" value="5"/> | 151 <!-- List of chromatographic columns --> |
164 | 152 <param name="chromcols" type="text" label="Chromatographic columns" size="2048" value=""/> |
165 <!-- PRECURSOR MATCH --> | 153 |
154 <!-- Tolerances --> | |
155 <param name="tolx" label="RTX" help="The retention time tolerance X parameter (in seconds)." type="float" value="5"/> | |
156 <param name="toly" label="RTY" help="The retention time tolerance Y parameter (no unit)." type="float" value="0.8"/> | |
157 <param name="tolz" label="RTZ" help="The retention time tolerance used when precursor matching is enabled." type="float" value="5"/> | |
158 | |
159 <!-- Precursor matching {{{2 --> | |
160 <!-- **************************************************************** --> | |
166 <conditional name="prec"> | 161 <conditional name="prec"> |
167 | 162 |
168 <param name="match" label="Precursor match" type="select"> | 163 <param name="match" label="Precursor match" type="select"> |
169 <option value="false">Off</option> | 164 <option value="false">Off</option> |
170 <option value="true">On</option> | 165 <option value="true">On</option> |
196 </sanitizer> | 191 </sanitizer> |
197 </param> | 192 </param> |
198 </when> | 193 </when> |
199 </conditional> | 194 </conditional> |
200 | 195 |
201 <!-- OUTPUT --> | 196 <!-- Output format {{{2 --> |
197 <!-- **************************************************************** --> | |
198 | |
202 <!-- Molecule IDs separator character --> | 199 <!-- Molecule IDs separator character --> |
203 <param name="molidssep" label="Molecule IDs separator character" type="text" size="3" value="|" help=""> | 200 <param name="molidssep" label="Multiple matches separator character" type="text" size="3" value="|" help=""> |
204 <sanitizer> | 201 <sanitizer> |
205 <valid initial="string.printable"> | 202 <valid initial="string.printable"> |
206 <remove value='"'/> | 203 <remove value='"'/> |
207 </valid> | 204 </valid> |
208 <mapping initial="none"> | 205 <mapping initial="none"> |
211 </sanitizer> | 208 </sanitizer> |
212 </param> | 209 </param> |
213 | 210 |
214 </inputs> | 211 </inputs> |
215 | 212 |
216 <!--======= | 213 <!-- Outputs {{{1 --> |
217 = OUTPUTS = | 214 <!-- **************************************************************** --> |
218 ========--> | |
219 | 215 |
220 <outputs> | 216 <outputs> |
221 | 217 |
222 <!-- Output file --> | |
223 <data name="mainoutput" label="lcmsmatch_${mzrtinput.name}" format="tabular"/> | 218 <data name="mainoutput" label="lcmsmatch_${mzrtinput.name}" format="tabular"/> |
224 <data name="peaksoutput" label="lcmsmatch_${mzrtinput.name}_peaks" format="tabular"/> | 219 <data name="peaksoutput" label="lcmsmatch_${mzrtinput.name}_peaks" format="tabular"/> |
225 <data name="htmloutput" label="lcmsmatch_${mzrtinput.name}.html" format="html"/> | 220 <data name="htmloutput" label="lcmsmatch_${mzrtinput.name}.html" format="html"/> |
226 | 221 |
227 </outputs> | 222 </outputs> |
228 | 223 |
229 <!--===== | 224 <!-- Tests {{{1 --> |
230 = TESTS = | 225 <!-- **************************************************************** --> |
231 ======--> | |
232 | 226 |
233 <tests> | 227 <tests> |
234 | 228 |
235 <!-- File database test --> | 229 <!-- Test 1, MZ only {{{2 --> |
230 <!-- **************************************************************** --> | |
236 <test> | 231 <test> |
237 <param name="dbtype" value="inhouse"/> | 232 <param name="dbtype" value="inhouse"/> |
238 <param name="dburl" value="filedb.tsv"/> | 233 <param name="dburl" value="filedb.tsv"/> |
239 <param name="dbfields" value=""/> | |
240 <param name="dbmsmodes" value=""/> | |
241 <param name="mzrtinput" value="mz-input-small.tsv"/> | 234 <param name="mzrtinput" value="mz-input-small.tsv"/> |
242 <param name="inputmzfield" value="mzmed"/> | 235 <param name="inputfields" value="mz=mz"/> |
243 <param name="inputrtfield" value="rtmed"/> | |
244 <param name="mzmode" value="pos"/> | 236 <param name="mzmode" value="pos"/> |
245 <output name="mainoutput" file="filedb-small-mz-match-output.tsv"/> | 237 <output name="mainoutput" file="test_1_main_output.tsv"/> |
246 <output name="peaksoutput" file="filedb-small-mz-match-peaks-output.tsv"/> | 238 <output name="peaksoutput" file="test_1_peaks_output.tsv"/> |
247 <output name="htmloutput" file="filedb-small-mz-match-html-output.html"/> | 239 <output name="htmloutput" file="test_1_peaks_output.html"/> |
248 </test> | 240 </test> |
249 | 241 |
250 <!-- File database test --> | 242 <!-- Test 2, MZ & RT {{{2 --> |
251 <!-- | 243 <!-- **************************************************************** --> |
252 <test> | 244 <test> |
253 <param name="dbtype" value="peakforest"/> | 245 <param name="dbtype" value="inhouse"/> |
254 <param name="dbtoken" value="@PEAKFOREST_TOKEN@"/> | 246 <param name="dburl" value="filedb.tsv"/> |
255 <param name="mzrtinput" value="mz-input-small.tsv"/> | 247 <param name="mzrtinput" value="mzrt-input-small.tsv"/> |
256 <param name="inputfields" value=""/> | 248 <param name="inputfields" value="mz=mz,rt=rt"/> |
257 <param name="mzmode" value="pos"/> | 249 <param name="mzmode" value="pos"/> |
258 <output name="mainoutput"> | 250 <param name="dbrtunit" value="min"/> |
259 <assert_contents> | 251 <param name="chromcols" value="col12"/> |
260 <has_text text="mz"/> | 252 <param name="tolx" value="5"/> |
261 </assert_contents> | 253 <param name="toly" value="0.8"/> |
262 </output> | 254 <output name="mainoutput" file="test_2_main_output.tsv"/> |
255 <output name="peaksoutput" file="test_2_peaks_output.tsv"/> | |
256 <output name="htmloutput" file="test_2_peaks_output.html"/> | |
263 </test> | 257 </test> |
264 --> | 258 |
259 <!-- Test 3, MZ & RT with precursor match {{{2 --> | |
260 <!-- **************************************************************** --> | |
261 <test> | |
262 <param name="dbtype" value="inhouse"/> | |
263 <param name="dburl" value="filedb.tsv"/> | |
264 <param name="mzrtinput" value="mzrt-input-small.tsv"/> | |
265 <param name="inputfields" value="mz=mz,rt=rt"/> | |
266 <param name="mzmode" value="pos"/> | |
267 <param name="dbrtunit" value="min"/> | |
268 <param name="chromcols" value="col12"/> | |
269 <param name="tolx" value="5"/> | |
270 <param name="toly" value="0.8"/> | |
271 <param name="match" value="true"/> | |
272 <param name="neg" value="[(M-H)]-,[M-H]-"/> | |
273 <param name="pos" value="[(M+H)]+,[M+H]+"/> | |
274 <param name="tolz" value="60"/> | |
275 <output name="mainoutput" file="test_3_main_output.tsv"/> | |
276 <output name="peaksoutput" file="test_3_peaks_output.tsv"/> | |
277 <output name="htmloutput" file="test_3_peaks_output.html"/> | |
278 </test> | |
279 | |
265 </tests> | 280 </tests> |
266 | 281 |
267 <!--==== | 282 <!-- Help {{{1 --> |
268 = HELP = | 283 <!-- **************************************************************** --> |
269 =====--> | |
270 | 284 |
271 <help> | 285 <help> |
272 <!-- @@@BEGIN_RST@@@ --> | 286 <!-- @@@BEGIN_RST@@@ --> |
273 | 287 |
274 ============== | 288 ============== |
294 Be careful to always provide UTF-8 encoded files, unless you do not use special characters at all. For instance, greek letters in molecule names give errors if the file is in latin1 (ISO 8859-1) or Windows 1252 (not distinguishable from latin1) encoding. | 308 Be careful to always provide UTF-8 encoded files, unless you do not use special characters at all. For instance, greek letters in molecule names give errors if the file is in latin1 (ISO 8859-1) or Windows 1252 (not distinguishable from latin1) encoding. |
295 | 309 |
296 Single file database | 310 Single file database |
297 ==================== | 311 ==================== |
298 | 312 |
299 The database used is provided as a single file, in tabular format, through the *Database file* field. This file must contain a list of MS peaks, with possibly retention times. | 313 In this case, the database used is provided as a single file by the user, in tabular format, through the *Database file* field. This file must contain a list of MS peaks, with possibly retention times. |
300 Peaks are "duplicated" as much as necessary. For instance if 3 retention times are available on a compound with 10 peaks in positive mode, then there will be 30 lines for this compound in positive mode. | 314 Peaks are "duplicated" as much as necessary. For instance if 3 retention times are available on a compound with 10 peaks in positive mode, then there will be 30 lines for this compound in positive mode. |
301 | 315 |
302 The file must contain a header with the column names. The names are free, but must be provided through the different fields named *Database file ... column name*. | 316 The file must contain a header with the column names. The names are free, but must be provided through the *Column names* field as a comma separated list of key/value pairs. See default value as an example. Of course it is much easier if your database file uses the default column names used in the default value of the *Column names* field. The column names shown in the default values, are only the ones used by the algorithm. You can provide any additional columns in your database file, they will be copied in the output. |
303 Then you must provide the values used to identify the MS modes (positive and negative). | 317 |
304 | 318 Then you must provide the values used to identify the MS modes (positive and negative), using field *MS modes*. |
305 A last information about the single file database is the unit of the retention times, either in seconds or in minutes. | 319 |
320 A last information about the single file database is the unit of the retention times, either in seconds or in minutes. Use the field "Retention time unit" to provide this information. | |
306 | 321 |
307 Example of database file (totally fake, no meaning): | 322 Example of database file (totally fake, no meaning): |
308 | 323 |
309 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ | 324 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ |
310 | molid | mode | mz | composition | attribution | col | rt | molcomp | molmass | molnames | | 325 | molid | mode | mz | composition | attribution | col | rt | molcomp | molmass | molnames | |
329 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ | 344 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ |
330 | A10 | "POS" | 72.080775 | "P9Z4W410 O0" | "[(M+H)-(J15L2M6O2)]+" | "colpp" | 0.89 | "J114L6M62O2" | 146.10553 | "Blablaine" | | 345 | A10 | "POS" | 72.080775 | "P9Z4W410 O0" | "[(M+H)-(J15L2M6O2)]+" | "colpp" | 0.89 | "J114L6M62O2" | 146.10553 | "Blablaine" | |
331 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ | 346 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ |
332 | A10 | "POS" | 145.097154 | "P92Z6W413 O2" | "[(M+H)-(H2)]+" | "hcoltt" | 0.8 | "J114L6M62O2" | 146.10553 | "Blablaine" | | 347 | A10 | "POS" | 145.097154 | "P92Z6W413 O2" | "[(M+H)-(H2)]+" | "hcoltt" | 0.8 | "J114L6M62O2" | 146.10553 | "Blablaine" | |
333 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ | 348 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+ |
349 | |
350 The corresponding value of the *Column names* field for this database field would be: | |
351 **mztheo=mz,chromcolrt=rt,compoundid=molid,chromcol=col,msmode=mode,peakattr=attribution**. | |
352 | |
353 And the value of the *MS modes* field would be: **pos=POS,neg=NEG**. | |
334 | 354 |
335 MZ/RT input file | 355 MZ/RT input file |
336 ================ | 356 ================ |
337 | 357 |
338 The input to provide is a file, in a tabular format (or TSV: Tab Seperated Values), containing the list of M/Z values, with possibly also RT values. | 358 The input to provide is a dataset in a tabular format (or TSV: Tab Seperated Values), containing the list of M/Z values, with possibly also RT values. The dataset is chosen through the field *Input file - MZ(/RT) values*. |
339 | 359 |
340 The column names for the M/Z and RT values must be provided through the fields *Input file MZ column name* and *Input file RT column name*. | 360 The column names for the M/Z and RT values must be provided through the field *Input column names*, as a comma separated list of key/value pairs. |
341 As a consequence, the file must contain a header line. | 361 The file/dataset must contain a header line with the same names specified in the field *Input column names*. |
342 | 362 |
343 The unit of the retention time has to be provided with the field *Retention time unit*. | 363 The unit of the retention time has to be provided with the field *Retention time unit*. |
344 | 364 |
345 Example of file input: | 365 Example of file input: |
346 | 366 |
362 | 382 |
363 ------------ | 383 ------------ |
364 M/Z matching | 384 M/Z matching |
365 ------------ | 385 ------------ |
366 | 386 |
367 In the simplest form of the algorithm only the *m/z* values are matched against the database peaks. This happens if both *Retention time match* and *Precursor match* are off. | 387 In the simplest form of the algorithm only the *M/Z* values are matched against the database peaks. This happens if both *Retention time match* and *Precursor match* are off. |
368 | 388 |
369 The first parameter is the MS mode, specified through the *MS mode* parameter. | 389 The first parameter is the MS mode, specified through the *MS mode* parameter. |
370 | 390 |
371 The parameters *M/Z precision* and *M/Z shift* are used by the algorithm in the following formula in order to match an *m/z* value: | 391 The parameters *M/Z precision* and *M/Z shift* are used by the algorithm in the following formula in order to match an *M/Z* value: |
372 | 392 |
373 mz (1 + (- shift - precision) / 10^6) < mzref < mz (1 + (- shift - precision) / 10^6) | 393 mz - shift - precision < mzref < mz - shift + precision |
374 | 394 |
375 Where *mzref* is the M/Z of reference from the database peak that is tested. If this double inequality is true, then the *m/z* value is matched with this peak. | 395 Where *mzref* is the M/Z of reference from the database peak that is tested. If this double inequality is true, then the *M/Z* value is matched with this peak. |
396 | |
397 The parameters *shift* and *precision* can be input in either PPM values of M/Z or in plain values. Use the field *M/Z tolerance unit* to set the unit. | |
376 | 398 |
377 -------------------- | 399 -------------------- |
378 Retention time match | 400 Retention time match |
379 -------------------- | 401 -------------------- |
380 | 402 |
381 If at least one column is selected inside the *Chromatographic columns* parameter section, then retention time is also matched, in addition to the *m/z* value, according to the following formula: | 403 If at least one column is selected inside the *Chromatographic columns* parameter section, then retention time is also matched, in addition to the *M/Z* value, according to the following formula: |
382 | 404 |
383 rt - x - rt^y < colrt < rt + x + rt^y | 405 rt - x - rt^y < colrt < rt + x + rt^y |
384 | 406 |
385 Where *x* is the value of the parameter *RTX* and *y* the value of the parameter *RTY*. | 407 Where *x* is the value of the parameter *RTX* and *y* the value of the parameter *RTY*. |
386 | 408 |
387 If for a reference compound the database does not contain retention time for at least one of the specified columns, then only the *m/z* value is matched against the peaks of the reference compound. This means that in the results you can find compounds that do no match the provided retention time value. | 409 If for a reference compound the database does not contain retention time for at least one of the specified columns, then only the *M/Z* value is matched against the peaks of the reference compound. This means that in the results you can find compounds that do no match the provided retention time value. |
388 | 410 |
389 The *RTZ* parameter is used in the *Precursor match* algorithm (see below). | 411 The *RTZ* parameter is used in the *Precursor match* algorithm (see below). |
390 | 412 |
391 --------------- | 413 --------------- |
392 Precursor match | 414 Precursor match |
393 --------------- | 415 --------------- |
394 | 416 |
395 If the "Precursor match" option is enabled inside the parameters section, then a more sophisticated version of the algorithm, which is executed in two steps, is used. | 417 If the "Precursor match" option is enabled inside the parameters section, then a more sophisticated version of the algorithm, which is executed in two steps, is used. |
396 | 418 |
397 This algorithm takes two more parameters, one for each MS mode. These are the lists of precursors. Since the matching is run for one MS mode only, only one of the two parameters is used. Inside the single file database, all the peaks whose **attr** column value is equal to one of the precursor listed in *List of negative precursors* or *List of positive precursors*, depending on the mode, are considered as precursor peaks. | 419 This algorithm takes two more parameters, one for each MS mode. These are the lists of precursors. Since the matching is run for one MS mode only, only one of the two parameters is used. Inside the single file database, all the peaks whose **peakattr** column value is equal to one of the precursor listed in *List of negative precursors* or *List of positive precursors*, depending on the mode, are considered as precursor peaks. |
398 | 420 |
399 M/Z matching using precursor matching | 421 M/Z matching using precursor matching |
400 ===================================== | 422 ===================================== |
401 | 423 |
402 1. Using the normal M/Z matching algorithm described above, we first look only for precursor peaks ([(M+H)]+, [(M+Na)]+, [(M+Cl)]-, ...). | 424 1. Using the normal M/Z matching algorithm described above, we first look only for precursor peaks ([(M+H)]+, [(M+Na)]+, [(M+Cl)]-, ...). |
412 | 434 |
413 --------------- | 435 --------------- |
414 Output settings | 436 Output settings |
415 --------------- | 437 --------------- |
416 | 438 |
417 The *Molecule IDs separator character* is used to customize the character used to separate the molecule IDs of the **molid** column inside the *main* output file. | 439 The *Multiple matches separator character* is used to customize the character used to separate the multiple values inside each row in the *main* output dataset. The *main* output contains as much rows as the MZ/RT input dataset, thus when for one MZ/RT value the algorithm finds more than one match, it concatenates the matches using this separator character. |
418 | 440 |
419 Output files | 441 Output files |
420 ============ | 442 ============ |
421 | 443 |
422 Three files are output by the tool. | 444 Three files are output by the tool. |
423 | 445 |
424 +-------------+--------------------------------------+--------------------------------------------------------+ | 446 +-------------+--------------------------------------+--------------------------------------------------------+ |
425 | Outputs | File name | Description | | 447 | Outputs | File name | Description | |
426 +-------------+--------------------------------------+--------------------------------------------------------+ | 448 +-------------+--------------------------------------+--------------------------------------------------------+ |
427 | Main output | lcmsmatching_{input_file_name} | Contains the list of compounds that have been matched. | | 449 | Main output | lcmsmatching_{input_file_name} | Contains the same data as the input dataset, with | |
450 | | | match result included on each row. If more than one | | |
451 | | | match is found for a row, the different values of the | | |
452 | | | match are concatenated using the provided separator | | |
453 | | | character. | | |
428 +-------------+--------------------------------------+--------------------------------------------------------+ | 454 +-------------+--------------------------------------+--------------------------------------------------------+ |
429 | Peak list | lcmsmatching_peaks_{input_file_name} | Contains all matched database peaks. | | 455 | Peak list | lcmsmatching_{input_file_name}_peaks | Contains the same data as the input dataset, with | |
456 | | | match result included on each row. If more than one | | |
457 | | | match is found for a row, then the row is duplicated. | | |
458 | | | Hence there is either no match for a row, or one | | |
459 | | | single match. | | |
430 +-------------+--------------------------------------+--------------------------------------------------------+ | 460 +-------------+--------------------------------------+--------------------------------------------------------+ |
431 | HTML output | lcmsmatching_{input_file_name}.html | Contains the two tables on one page. | | 461 | HTML output | lcmsmatching_{input_file_name}.html | Contains the same table as *Peak list* but in HTML | |
462 | | | format and with links to external databases if columns | | |
463 | | | for PubChem Compound, ChEBI, HMDB Metabolites or KEGG | | |
464 | | | Compounds are provided. | | |
432 +-------------+--------------------------------------+--------------------------------------------------------+ | 465 +-------------+--------------------------------------+--------------------------------------------------------+ |
433 | 466 |
434 The **main** output is identical to the input file, to which is added an *msmatching* column. This column contains a list of IDs of the compounds that have been matched for this couple of (m/z, rt) values. | 467 The match results are output as new columns appended to the columns provided inside the MZ/RT input dataset, and prefixed with "lcmsmatching.". |
435 | |
436 The **peak list** output contains all database peaks that have been matched, for each (m/z, rt) input couple. Thus for each (m/z, rt) couple, there will be zero, one or more matched peaks output. The columns output are *mz*, *rt*, *id*, *mztheo*, *col*, *colrt*, *attribution* and *composition*, where *id* is the compound ID, *mztheo* is the theoretical mass of the fragment, *col* is the matched column and *colrt* is the retention time measured on the column for the reference compound. | |
437 | |
438 The **HTML** output contains the peak table with links toward HMDB, KEGG, ChEBI and PubChem public databases, when IDs are available. | |
439 | 468 |
440 ===== | 469 ===== |
441 About | 470 About |
442 ===== | 471 ===== |
443 | 472 |
453 Data and algorithms have been kindly provided by Christophe Junot at *DSV/IBITEC-S/SPI* (*CEA/Saclay*), from a former application developped by Cyrille Petat and Arnaud Martel at *DSV/IBITEC-S/DIR* (*CEA/Saclay*). | 482 Data and algorithms have been kindly provided by Christophe Junot at *DSV/IBITEC-S/SPI* (*CEA/Saclay*), from a former application developped by Cyrille Petat and Arnaud Martel at *DSV/IBITEC-S/DIR* (*CEA/Saclay*). |
454 | 483 |
455 .. class:: infomark | 484 .. class:: infomark |
456 | 485 |
457 **Please cite** | 486 **Please cite** |
458 R Core Team (2013). R: A language and Environment for Statistical Computing. http://www.r-project.org | 487 R Core Team (2013). R: A language and Environment for Statistical Computing. http://www.r-project.org. |
488 | |
489 ============== | |
490 Changelog/News | |
491 ============== | |
492 | |
493 **Version 4.0.0 - 02/01/2019** | |
494 | |
495 - NEW: Use of R biodb library. Connection to databases and matching have been moved to biodb library, which is maintained separately at http://github.com/pkrog/biodb. | |
459 | 496 |
460 <!-- @@@END_RST@@@ --> | 497 <!-- @@@END_RST@@@ --> |
461 </help> | 498 </help> |
462 | 499 |
463 <!--========= | 500 <!-- Citations {{{1 --> |
464 = CITATIONS = | 501 <!-- **************************************************************** --> |
465 ==========--> | 502 |
466 | 503 <citations> |
467 <citations/> | 504 <citation type="bibtex">@unpublished{FGiacomoni2017, |
505 title = {PeakForest [Internet], a spectral data portal for Metabolomics community - storing, curating and annotation services for metabolic profiles of biological matrix.}, | |
506 author = {Franck Giacomoni, Nils Paulhe}, | |
507 institution = {INRA / MetaboHUB}, | |
508 year = {2017}, | |
509 note = {Unpublished paper, available from: https://peakforest.org/.} | |
510 }</citation> | |
511 </citations> | |
468 | 512 |
469 </tool> | 513 </tool> |