Mercurial > repos > galaxyp > proteomics_moff
comparison moff.xml @ 0:b4098353ee73 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/moFF commit bc0fad49e3ba73fa5b5b326e940adf9e11854d94
author | galaxyp |
---|---|
date | Fri, 05 Jan 2018 12:47:36 -0500 |
parents | |
children | 8f0e76ad46ef |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b4098353ee73 |
---|---|
1 <tool id="proteomics_moff" name="moFF" version="@VERSION@"> | |
2 <description>extracts MS1 intensities from spectrum files</description> | |
3 <macros> | |
4 <token name="@VERSION@">1.2</token> | |
5 <!-- xml macros, used for shared Galaxy parameter inputs --> | |
6 <xml name="ident_input_macro" token_allow_multiple="true" token_input_type="data"> | |
7 <!-- this is exactly the same across all three, except for allowing multiple in MBR and all but not in moff --> | |
8 <conditional name="ident_input"> | |
9 <param name="input_type_selector" type="select" label="Choose the format for the identification file:"> | |
10 <option value="ps">Peptide Shaker PSM report (standard, not extended)</option> | |
11 <option value="generic">Another tabular identification file</option> | |
12 </param> | |
13 <when value="ps"> | |
14 <param name="ident_input_file" type="@INPUT_TYPE@" format="tabular" label="Peptide Shaker PSM report" multiple="@ALLOW_MULTIPLE@"/> | |
15 </when> | |
16 <when value="generic"> | |
17 <param name="ident_input_file" type="@INPUT_TYPE@" format="tabular" label="A general tabular format" multiple="@ALLOW_MULTIPLE@" | |
18 help="Must have specific columns; see below to select these columns from your file. The file should have at most one header line. "/> | |
19 <param name="remove_header" type="boolean" value="false" label="Remove the header line?" help="This is necessary if the file has a line with column headers"/> | |
20 <param name="peptide" | |
21 type="data_column" | |
22 data_ref="ident_input_file" | |
23 label="Column with peptide-spectrum-match sequence"/> | |
24 <param name="prot" | |
25 type="data_column" | |
26 data_ref="ident_input_file" | |
27 label="Column with protein ID"/> | |
28 <param name="mod_peptide" | |
29 type="data_column" | |
30 data_ref="ident_input_file" | |
31 label="Column with peptide-spectrum-match sequence that contains possible modifications"/> | |
32 <param name="rt" | |
33 type="data_column" | |
34 data_ref="ident_input_file" | |
35 label="Column with PSM retention time (in second)"/> | |
36 <param name="mz" | |
37 type="data_column" | |
38 data_ref="ident_input_file" | |
39 label="Column with m/z (mass over charge)"/> | |
40 <param name="mass" | |
41 type="data_column" | |
42 data_ref="ident_input_file" | |
43 label="Column with mass of the peptide"/> | |
44 <param name="charge" | |
45 type="data_column" | |
46 data_ref="ident_input_file" | |
47 label="Column with charge of ionized peptide"/> | |
48 </when> | |
49 </conditional> | |
50 </xml> | |
51 <xml name="raw_input_macro" token_allow_multiple="true" token_input_type="data"> | |
52 <conditional name="msms_input"> | |
53 <param name="input_type_selector" type="select" label="Choose the format for the MS/MS file"> | |
54 <option value="raw">Thermo RAW file</option> | |
55 <option value="mzml">mzML</option> | |
56 </param> | |
57 <when value="raw"> | |
58 <param argument="--inputraw" type="@INPUT_TYPE@" multiple="@ALLOW_MULTIPLE@" format="raw" label="RAW file(s)"/> | |
59 </when> | |
60 <when value="mzml"> | |
61 <param argument="--inputraw" type="@INPUT_TYPE@" multiple="@ALLOW_MULTIPLE@" format="mzml" label="mzML file(s)"/> | |
62 </when> | |
63 </conditional> | |
64 </xml> | |
65 <!-- tokens (code snippets used in <command>) --> | |
66 <token name="@IDENT_INPUT_ARG_MULTIPLE@"><![CDATA[ | |
67 ## this is where the ident input gets passed to moff/moff_all/moff_mbr | |
68 --inputtsv | |
69 #for $key in $task.ident_input.ident_input_file.keys(): | |
70 './ident_inputs/${task.ident_input.ident_input_file[$key].display_name}' | |
71 #end for | |
72 ]]></token> | |
73 <token name="@IDENT_INPUT_ARG_SINGLE@"><![CDATA[ | |
74 ## this is where the ident input gets passed to moff/moff_all/moff_mbr | |
75 --inputtsv './ident_inputs/${task.ident_input.ident_input_file.display_name}' | |
76 ]]></token> | |
77 <token name="@WRANGLE_IDENT_INPUT_SINGLE@"><![CDATA[ | |
78 mkdir ./ident_inputs && | |
79 #if $task.ident_input.input_type_selector == "ps": | |
80 ln -s '$task.ident_input.ident_input_file' './ident_inputs/$task.ident_input.ident_input_file.display_name' && | |
81 #else | |
82 ## optionally remove first line | |
83 #if $task.ident_input.remove_header: | |
84 sed -i '1d' '$task.ident_input.ident_input_file' && | |
85 #end if | |
86 ## header row with correct names: "peptide", "prot", "mod_peptide", "rt", "mz", "mass", and "charge" | |
87 echo -e "peptide\tprot\tmod_peptide\trt\tmz\tmass\tcharge" > tempfile.tab && | |
88 awk 'BEGIN{OFS="\t"; FS="\t"}{print \$pep,\$prot,\$mod,\$rt,\$mz,\$mass,\$charge}' pep="${task.ident_input.peptide}" prot="$task.ident_input.prot" mod="$task.ident_input.mod_peptide" rt="$task.ident_input.rt" mz="$task.ident_input.mz" mass="$task.ident_input.mass" charge="$task.ident_input.charge" '$task.ident_input.ident_input_file' >> tempfile.tab && | |
89 mv tempfile.tab '$task.ident_input.ident_input_file' && | |
90 ln -s '$task.ident_input.ident_input_file' './ident_inputs/$task.ident_input.ident_input_file.display_name' && | |
91 #end if | |
92 ]]></token> | |
93 <token name="@WRANGLE_IDENT_INPUT_MULTIPLE@"><![CDATA[ | |
94 mkdir ./ident_inputs && | |
95 #if $task.ident_input.input_type_selector == "ps": | |
96 #for $key in $task.ident_input.ident_input_file.keys(): | |
97 ln -s '${task.ident_input.ident_input_file[$key]}' './ident_inputs/${task.ident_input.ident_input_file[$key].display_name}' && | |
98 #end for | |
99 #else | |
100 #for $key in $task.ident_input.ident_input_file.keys(): | |
101 ## optionally remove first line | |
102 #if $task.ident_input.remove_header: | |
103 sed -i '1d' '$task.ident_input.ident_input_file[$key]' && | |
104 #end if | |
105 ## header row with correct names: "peptide", "prot", "mod_peptide", "rt", "mz", "mass", and "charge" | |
106 echo -e "peptide\tprot\tmod_peptide\trt\tmz\tmass\tcharge" > tempfile.tab && | |
107 awk 'BEGIN{OFS="\t"; FS="\t"}{print \$pep,\$prot,\$mod,\$rt,\$mz,\$mass,\$charge}' pep="${task.ident_input.peptide}" prot="$task.ident_input.prot" mod="$task.ident_input.mod_peptide" rt="$task.ident_input.rt" mz="$task.ident_input.mz" mass="$task.ident_input.mass" charge="$task.ident_input.charge" '$filename' >> tempfile.tab && | |
108 mv tempfile.tab '$task.ident_input.ident_input_file[$key]' && | |
109 ln -s '$task.ident_input.ident_input_file[$key]' './ident_inputs/$task.ident_input.ident_input_file[$key].display_name' && | |
110 #end for | |
111 #end if | |
112 ]]></token> | |
113 <token name="@RAW_INPUT_ARG_SINGLE@"><![CDATA[ | |
114 --inputraw './raws/$task.msms_input.inputraw.display_name' | |
115 ]]></token> | |
116 <token name="@RAW_INPUT_ARG_MULTIPLE@"><![CDATA[ | |
117 --inputraw | |
118 #for $key in $task.msms_input.inputraw.keys(): | |
119 './raws/$task.msms_input.inputraw[$key].display_name' | |
120 #end for | |
121 ]]></token> | |
122 <token name="@WRANGLE_RAW_INPUT_SINGLE@"><![CDATA[ | |
123 mkdir ./raws && | |
124 ## for files, need to softlink the display name to the history item | |
125 ln -s '$task.msms_input.inputraw' './raws/$task.msms_input.inputraw.display_name' && | |
126 ]]></token> | |
127 <token name="@WRANGLE_RAW_INPUT_MULTIPLE@"><![CDATA[ | |
128 mkdir ./raws && | |
129 ## for files, need to softlink the display name to the history item | |
130 #for $key in $task.msms_input.inputraw.keys(): | |
131 ln -s '$task.msms_input.inputraw[$key]' './raws/$task.msms_input.inputraw[$key].display_name' && | |
132 #end for | |
133 ]]></token> | |
134 </macros> | |
135 <requirements> | |
136 <requirement type="package" version="@VERSION@">moff</requirement> | |
137 </requirements> | |
138 <command detect_errors="aggressive"><![CDATA[ | |
139 mkdir ./out && | |
140 #if $task.task_selector == "moff": | |
141 @WRANGLE_IDENT_INPUT_SINGLE@ | |
142 @WRANGLE_RAW_INPUT_SINGLE@ | |
143 moff.py | |
144 @IDENT_INPUT_ARG_SINGLE@ | |
145 @RAW_INPUT_ARG_SINGLE@ | |
146 --tol $task.tol | |
147 --rt_w $task.rt_w | |
148 --rt_p $task.rt_p | |
149 --output_folder ./out | |
150 #if ($task.peptide_summary): | |
151 --peptide_summary 1 | |
152 #end if | |
153 && | |
154 #if $task.peptide_summary: | |
155 mv ./out/peptide_summary_intensity_moFF_run.tab '$output_peptide_summary' && | |
156 #end if | |
157 mv ./out/*moff_result.txt '$output_table' | |
158 && | |
159 mv ./out/*.log '$output_logs' | |
160 #else if $task.task_selector == "mbr": | |
161 @WRANGLE_IDENT_INPUT_MULTIPLE@ | |
162 moff_mbr.py | |
163 --inputF ./ident_inputs | |
164 --ext $task.ext | |
165 && | |
166 mv ./ident_inputs/mbr_output/* ./out | |
167 #else: | |
168 ## moff_all (mbr followed by apex) | |
169 @WRANGLE_IDENT_INPUT_MULTIPLE@ | |
170 @WRANGLE_RAW_INPUT_MULTIPLE@ | |
171 moff_all.py | |
172 @IDENT_INPUT_ARG_MULTIPLE@ | |
173 @RAW_INPUT_ARG_MULTIPLE@ | |
174 --tol $task.tol | |
175 --rt_w $task.rt_w | |
176 --rt_p $task.rt_p | |
177 --rt_p_match $task.rt_p_match | |
178 --output_folder ./out | |
179 --ext txt | |
180 #if $task.peptide_summary: | |
181 --peptide_summary 1 | |
182 #end if | |
183 && | |
184 #if $task.peptide_summary: | |
185 mv ./out/peptide_summary_intensity_moFF_run.tab '$output_peptide_summary' && | |
186 #end if | |
187 echo -ne | |
188 #end if | |
189 ]]></command> | |
190 <inputs> | |
191 <conditional name="task"> | |
192 <param name="task_selector" type="select" label="Choose which module to run"> | |
193 <option value="moff" selected="true">Apex intensity</option> | |
194 <option value="mbr">Match between runs</option> | |
195 <option value="all">All (match-between-runs followed by quantitation)</option> | |
196 </param> | |
197 <when value = "moff"> | |
198 <expand macro="ident_input_macro" allow_multiple="false"/> | |
199 <expand macro="raw_input_macro" allow_multiple="false"/> | |
200 <param argument="--tol" type="float" value="10" label="Tolerance parameter" | |
201 help="Specify the tolerance parameter in ppm." /> | |
202 <param argument="--rt_w" type="float" value="3.0" label="Retention time window" | |
203 help="Specify rt window for xic in minutes." /> | |
204 <param argument="--rt_p" type="float" value="1" label="Time window for the peak" | |
205 help="Specify the time windows for the peak in minutes." /> | |
206 <param argument="--rt_p_match" type="float" value="1.5" label="Time window for the matched peak" | |
207 help="Specify the time windows for the matched peak in minutes." /> | |
208 <param argument="--peptide_summary" type="boolean" value="true" label="Output the peptide summary?"/> | |
209 </when> | |
210 <when value="mbr"> | |
211 <expand macro="ident_input_macro" allow_multiple="false" input_type="data_collection"/> | |
212 <param argument="--ext" type="text" value="tab" label="Provide the extension used in the display file name (without the period)"/> | |
213 </when> | |
214 <when value="all"> | |
215 <expand macro="ident_input_macro" allow_multiple="false" input_type="data_collection"/> | |
216 <expand macro="raw_input_macro" allow_multiple="false" input_type="data_collection"/> | |
217 <param argument="--tol" type="float" value="10" label="Tolerance parameter" | |
218 help="Specify the tolerance parameter in ppm." /> | |
219 <param argument="--rt_w" type="float" value="3.0" label="Retention time window" | |
220 help="Specify rt window for xic in minutes." /> | |
221 <param argument="--rt_p" type="float" value="1" label="Time window for the peak" | |
222 help="Specify the time windows for the peak in minutes." /> | |
223 <param argument="--rt_p_match" type="float" value="1.2" label="Time window for the matched peak" | |
224 help="Specify the time windows for the matched peak in minutes." /> | |
225 <param argument="--peptide_summary" type="boolean" value="true" label="Output the peptide summary?"/> | |
226 </when> | |
227 </conditional> | |
228 </inputs> | |
229 <outputs> | |
230 <data format="tabular" name="output_table" label="${tool.name} quantification: ${on_string}"> | |
231 <filter>task['task_selector']=='moff'</filter> | |
232 </data> | |
233 <data format="txt" name="output_logs" label="${tool.name} log: ${on_string}"> | |
234 <filter>task['task_selector']=='moff'</filter> | |
235 </data> | |
236 <collection name="ident_output" type="list" label="${tool.name} quantification: ${on_string}"> | |
237 <filter>task['task_selector']=='all' or task['task_selector']=='mbr'</filter> | |
238 <!--discover datasets method --> | |
239 <discover_datasets pattern="(?P<designation>.*)\.txt" directory="out" format="tabular"/> | |
240 </collection> | |
241 <collection name="log_output" type="list" label="${tool.name} logs: ${on_string}"> | |
242 <filter>task['task_selector']=='all' or task['task_selector']=='mbr'</filter> | |
243 <discover_datasets pattern="(?P<designation>.*)\.log" directory="out" format="txt"/> | |
244 </collection> | |
245 <data format="tabular" name="output_peptide_summary" label="${tool.name} peptide summary: ${on_string}"> | |
246 <filter>task['peptide_summary']</filter> | |
247 </data> | |
248 </outputs> | |
249 <tests> | |
250 <!-- test moff_all --> | |
251 <test> | |
252 <param name="task_selector" value="all"/> | |
253 <param name="input_type_selector" value="ps"/> | |
254 <param name="ident_input_file"> | |
255 <collection type="list"> | |
256 <element name="mbr_test1" value="input/mbr_test1.tabular"/> | |
257 <element name="mbr_test2" value="input/mbr_test2.tabular"/> | |
258 </collection> | |
259 </param> | |
260 <param name="inputraw"> | |
261 <collection type="list"> | |
262 <element name="mbr_test1" value="input/mbr_test1.mzml"/> | |
263 <element name="mbr_test2" value="input/mbr_test2.mzml"/> | |
264 </collection> | |
265 </param> | |
266 <param name="peptide_summary" value="true"/> | |
267 <output name="output_peptide_summary" ftype="tabular"> | |
268 <assert_contents> | |
269 <has_text text="sumIntensity_mbr_test1"/> | |
270 <has_text text="sumIntensity_mbr_test2"/> | |
271 </assert_contents> | |
272 </output> | |
273 <output_collection name="ident_output" type="list"> | |
274 <element name="mbr_test1_match_moff_result" value="output1/mbr_test1_match_moff_result.txt"/> | |
275 <element name="mbr_test2_match_moff_result" value="output1/mbr_test2_match_moff_result.txt"/> | |
276 </output_collection> | |
277 <output_collection name="log_output" type="list"> | |
278 <element name="mbr_test1_match__moff"> | |
279 <assert_contents> | |
280 <has_line line="peptide at line 200 --> MZ: 783.4200 RT: 134.6997 matched (yes=1/no=0): 0"/> | |
281 </assert_contents> | |
282 </element> | |
283 <element name="mbr_test2_match__moff"> | |
284 <assert_contents> | |
285 <has_line line="peptide at line 132 --> MZ: 767.8700 RT: 98.1975 matched (yes=1/no=0): 0"/> | |
286 </assert_contents> | |
287 </element> | |
288 </output_collection> | |
289 </test> | |
290 <!-- test moff alone --> | |
291 <test> | |
292 <param name="task_selector" value="moff"/> | |
293 <param name="input_type_selector" value="ps"/> | |
294 <param name="ident_input_file" value="input/test.tabular" ftype="tabular"/> | |
295 <param name="msms_input" value="mzml"/> | |
296 <param name="inputraw" value="input/test.mzml" ftype="mzml"/> | |
297 <param name="peptide_summary" value="true"/> | |
298 <output name="output_peptide_summary" ftype="tabular" file="output2/moff_test_pepsum.tab"/> | |
299 <output name="output_logs"> | |
300 <assert_contents> | |
301 <has_line line="peptide at line 294 --> MZ: 677.3300 RT: 60.6078"/> | |
302 </assert_contents> | |
303 </output> | |
304 </test> | |
305 <!-- test the generic input --> | |
306 <test> | |
307 <param name="task_selector" value="moff"/> | |
308 <param name="input_type_selector" value="generic"/> | |
309 <param name="ident_input_file" value="input/test.tabular" ftype="tabular"/> | |
310 <param name="remove_header" value="true"/> | |
311 <param name="msms_input" value="mzml"/> | |
312 <param name="inputraw" value="input/test.mzml" ftype="mzml"/> | |
313 <param name="peptide" value="3"/> | |
314 <param name="prot" value="2"/> | |
315 <param name="mod_peptide" value="7"/> | |
316 <param name="rt" value="13"/> | |
317 <param name="mz" value="14"/> | |
318 <param name="mass" value="17"/> | |
319 <param name="charge" value="15"/> | |
320 <param name="peptide_summary" value="true"/> | |
321 <output name="output_peptide_summary" ftype="tabular" file="output2/moff_test_pepsum.tab"/> | |
322 <output name="output_logs"> | |
323 <assert_contents> | |
324 <has_line line="peptide at line 294 --> MZ: 677.3300 RT: 60.6078"/> | |
325 </assert_contents> | |
326 </output> | |
327 </test> | |
328 <!-- test mbr --> | |
329 <test> | |
330 <param name="task_selector" value="mbr"/> | |
331 <param name="input_type_selector" value="ps"/> | |
332 <param name="ident_input_file"> | |
333 <collection type="list"> | |
334 <element name="mbr_test1" value="input/mbr_test1.tabular"/> | |
335 <element name="mbr_test2" value="input/mbr_test2.tabular"/> | |
336 </collection> | |
337 </param> | |
338 <param name="ext" value="tabular"/> | |
339 <output_collection name="ident_output" type="list" count="2"> | |
340 <element name="mbr_test1_match" file="input/mbr_output/mbr_test1_match.txt"/> | |
341 <element name="mbr_test2_match" file="input/mbr_output/mbr_test2_match.txt"/> | |
342 </output_collection> | |
343 </test> | |
344 </tests> | |
345 <help> | |
346 <![CDATA[ | |
347 **Description** | |
348 | |
349 moFF (a Modest Feature Finder) is an OS independent tool designed to extract | |
350 apex MS1 intensity using a set of identified MS2 peptides. | |
351 It currently uses a Go library to directly extract data from Thermo Raw spectrum files, | |
352 eliminating the need for conversions from other formats. | |
353 Moreover, moFF also allows one to work directly with mzML files. | |
354 | |
355 **Usage** | |
356 | |
357 *Modules:* | |
358 | |
359 1. Apex Intensity: this is used for a single pair of files, one identification and one spectrum file. | |
360 2. Match between runs (MBR): for multiple identification files, share MS2 identified peptides between runs and predict the retention time. | |
361 3. All (match between runs followed by apex intensity): this is used for more than one pair of identification and spectrum files. | |
362 | |
363 If both match between runs and apex intensity are desired, it is best to run them both at once (i.e., run the 'All' module). | |
364 The MBR module is mainly useful for observing the intermediate steps of the algorithm - its outputs are not able to be used as inputs in moFF or in other tools. | |
365 | |
366 | |
367 *Inputs:* | |
368 | |
369 - Identification file: this can either be a generic tabular file or the standard PSM report from PeptideShaker. | |
370 If it is a generic tabular file, please select the columns corresponding to the required information. | |
371 | |
372 - MS/MS file: this can either be a Thermo raw file or an mzML file. | |
373 | |
374 A given pair of files must have the *exact* same display name, not including the extension; | |
375 e.g. ``example1.tabular`` and ``example1.mzml``. | |
376 If the display names are different, simply change them in the history menu. | |
377 | |
378 For multiple files (the MBR or All modules), the identification and spectrum files must be provided as dataset collections. | |
379 This allows for usage of the output dataset collections in workflows. | |
380 | |
381 *Parameters:* | |
382 | |
383 All the parameters related to the the time windows (``rt_w``, ``rt_p``, ``rt_p_match``) are basically the | |
384 half of the entire time windows where the apex peak is searched or the XIC is retrieved. | |
385 For correct rt windows, we suggest you set the ``rt_p`` value equal to or slighly greater than the | |
386 dynamic exclusion duration set in your machine. We suggest also to set the | |
387 ``rt_p_match`` always slightly bigger than tha values used for ``rt_p``. | |
388 | |
389 *Outputs:* | |
390 | |
391 When used in the single file mode ("Apex intensity" module), the outputs are 2 (or 3) files: a log file, a quantitation file, | |
392 and (optionally) a peptide summary, with intensities aggregated across peptides. When used in the multiple file mode ("All"), | |
393 the outputs are a dataset collection of log files (one per identification file), a dataset collection of quantification files, and (optionally) a peptide summary. | |
394 | |
395 If used with a generic tabular format, the only columns in the output file are the 7 columns selected while using moFF plus the columns that moFF adds. Other columns are discarded. | |
396 | |
397 **More Information** | |
398 | |
399 See the moFF Github site at https://github.com/compomics/moFF, | |
400 and the publication at https://dx.doi.org/10.1038/nmeth.4075 | |
401 | |
402 ]]> | |
403 </help> | |
404 <citations> | |
405 <citation type="doi">10.1038/nmeth.4075</citation> | |
406 </citations> | |
407 </tool> |