Mercurial > repos > wolma > mimodd_main
comparison convert.xml @ 0:f0f2795de2c7 draft
planemo upload for repository https://github.com/wm75/mimodd_galaxy_wrappers commit 528bcf3b769c7c73f119b2a176d19071f9ef5312
author | wolma |
---|---|
date | Tue, 19 Dec 2017 04:54:04 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f0f2795de2c7 |
---|---|
1 <tool id="mimodd_convert" name="MiModD Convert" version="@MIMODD_WRAPPER_VERSION@"> | |
2 <description>converts sequence data into different formats</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 <macro name="to_format"> | |
6 <param name="oformat" type="select" label="to"> | |
7 <option value="sam">sam</option> | |
8 <option value="bam">bam</option> | |
9 <yield /> | |
10 </param> | |
11 </macro> | |
12 <macro name="se_selector" token_format="fastq"> | |
13 <conditional name="input"> | |
14 <param name="repr" type="select" | |
15 label="Single-end input data provided as"> | |
16 <option value="individual">Individual datasets</option> | |
17 <option value="collection">Collection of datasets</option> | |
18 </param> | |
19 <when value="individual"> | |
20 <repeat name="input_data" title="fastq input datasets" | |
21 default="1" min="1"> | |
22 <param name="file1" type="data" format="@FORMAT@" | |
23 label="single-end read data"/> | |
24 </repeat> | |
25 </when> | |
26 <when value="collection"> | |
27 <param name="input_data" type="data_collection" | |
28 collection_type="list" format="fastq, fastq.gz" | |
29 label="collection of single-end read input datasets" /> | |
30 </when> | |
31 </conditional> | |
32 <param name="header" type="data" format="sam" label="Use Header File" | |
33 help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file."/> | |
34 | |
35 </macro> | |
36 <macro name="pe_selector" token_format="fastq"> | |
37 <conditional name="input"> | |
38 <param name="repr" type="select" | |
39 label="Paired-end input data provided as"> | |
40 <option value="individual">Individual datasets</option> | |
41 <option value="collection">Paired collection</option> | |
42 <option value="list_of_pairs">List of pairs</option> | |
43 </param> | |
44 <when value="individual"> | |
45 <repeat name="input_data" title="fastq input datasets" | |
46 default="1" min="1"> | |
47 <param name="file1" type="data" format="@FORMAT@" | |
48 label="first set of reads of paired-end data"/> | |
49 <param name="file2" type="data" format="@FORMAT@" | |
50 label="second set of reads of paired-end data"/> | |
51 </repeat> | |
52 </when> | |
53 <when value="collection"> | |
54 <param name="input_data" type="data_collection" | |
55 collection_type="paired" format="fastq, fastq.gz" | |
56 label="paired input dataset collection" /> | |
57 </when> | |
58 <when value="list_of_pairs"> | |
59 <param name="input_data" type="data_collection" | |
60 collection_type="list:paired" format="fastq, fastq.gz" | |
61 label="nested collection of paired input datasets" /> | |
62 </when> | |
63 </conditional> | |
64 <param name="header" type="data" format="sam" label="Use Header File" | |
65 help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file."/> | |
66 | |
67 </macro> | |
68 <macro name="sam_bam_selector" token_format="sam"> | |
69 <param name="input_data" type="data" format="@FORMAT@" | |
70 label="input dataset"/> | |
71 <param name="header" type="hidden" value="None"/> | |
72 </macro> | |
73 <macro name="fastq_output_choices"> | |
74 <param name="split_on_rgs" type="hidden" value=""/> | |
75 <param name="reads_to_report" type="select" display="radio" | |
76 label="Types of reads to generate output for" | |
77 help="By default, the tool will generate two dataset collections, one for single reads found in the input file, and one for paired reads. If you know, in advance, that the input contains only single or only paired reads, you can prevent the generation of an empty dataset collection by selecting the appropriate option here."> | |
78 <option value="default">Single and paired reads</option> | |
79 <option value="single">Single reads only</option> | |
80 <option value="paired">Paired reads only</option> | |
81 </param> | |
82 <param name="multisegment_report" type="boolean" checked="false" | |
83 label="Generate additional output for multi-segment reads" | |
84 help="Multi-segment reads, as opposed to single- and two-segment/paired reads, are not present in typical NGS data, but allowed in SAM/BAM files. If you expect multi-segment reads in the input file, enable this option." /> | |
85 </macro> | |
86 <macro name="bam_output_choices"> | |
87 <param name="split_on_rgs" type="boolean" truevalue="--split-on-rgs" falsevalue="" checked="false" | |
88 label="Split output based on read group IDs" | |
89 help="If the input file contains reads from different read groups, write them to separate output files; implied automatically for conversions to fastq and gzipped fastq format"/> | |
90 </macro> | |
91 </macros> | |
92 <expand macro="requirements" /> | |
93 <expand macro="stdio" /> | |
94 <expand macro="version_command" /> | |
95 <command><![CDATA[ | |
96 ## Currently Galaxy does not autoconvert collections of fastq.gz files. | |
97 ## This tool wrapper fixes that by allowing fastq and fastq.gz as input | |
98 ## collection formats. | |
99 ## gz_input is then used as flag to indicate a fastq.gz input file | |
100 #set gz_input = False | |
101 #if $str($mode.output.split_on_rgs) or $str($mode.output.oformat) == "fastq" or $str($mode.output.oformat) == "gz": | |
102 mkdir converted_data && | |
103 #end if | |
104 | |
105 mimodd convert | |
106 #if $str($mode.iformat) in ("sam", "bam"): | |
107 '${mode.output.input_data}' | |
108 #else if $str($mode.iformat) == "fastq_pe": | |
109 #if $str($mode.output.input.repr) == "collection": | |
110 '$mode.output.input.input_data.forward' '$mode.output.input.input_data.reverse' | |
111 ## A paired collection - if the forward dataset is gzipped we assume | |
112 ## the reverse dataset is too. | |
113 #if $mode.output.input.input_data.forward.is_of_type('fastq.gz'): | |
114 #set gz_input = True | |
115 #end if | |
116 #else | |
117 #for $i in $mode.output.input.input_data | |
118 #if $str($mode.output.input.repr) == "individual": | |
119 '${i.file1}' '${i.file2}' | |
120 #else | |
121 '$i.forward' '$i.reverse' | |
122 ## A list:paired collection - let the last forward dataset | |
123 ## indicate whether input is gzipped | |
124 #if $i.forward.is_of_type('fastq.gz'): | |
125 #set gz_input = True | |
126 #end if | |
127 #end if | |
128 #end for | |
129 #end if | |
130 #else | |
131 #for $i in $mode.output.input.input_data | |
132 #if $str($mode.output.input.repr) == "collection": | |
133 '$i' | |
134 ## A simple collection of files - the last one determines | |
135 ## whether we assume gzipped input | |
136 #if $i.is_of_type('fastq.gz'): | |
137 #set gz_input = True | |
138 #end if | |
139 #else | |
140 '${i.file1}' | |
141 #end if | |
142 #end for | |
143 #end if | |
144 #if $str($mode.output.header) != "None": | |
145 --header '$(mode.output.header)' | |
146 #end if | |
147 #if $str($outputname) == "None": | |
148 --ofile converted_data/read_group | |
149 #else | |
150 --ofile '$outputname' | |
151 #end if | |
152 #if $gz_input: | |
153 ## a gzipped input dataset was found so lets set --iformat accordingly | |
154 #if $str($mode.iformat) == "fastq_pe": | |
155 --iformat gz_pe | |
156 #else | |
157 --iformat gz | |
158 #end if | |
159 #else | |
160 --iformat $(mode.iformat) | |
161 #end if | |
162 --oformat $(mode.output.oformat) | |
163 ${mode.output.split_on_rgs} | |
164 | |
165 #if $str($mode.output.oformat) == "fastq" or $str($mode.output.oformat) == "gz": | |
166 && | |
167 cd converted_data && | |
168 for f in *2segments_r1.fastq.gz; do [ -f "\$f" ] || continue; mv "\$f" "`basename \$f _r1.fastq.gz`_forward.fastq.gz"; done && | |
169 for f in *2segments_r2.fastq.gz; do [ -f "\$f" ] || continue; mv "\$f" "`basename \$f _r2.fastq.gz`_reverse.fastq.gz"; done && | |
170 for f in *2segments_r1.fastq; do [ -f "\$f" ] || continue; mv "\$f" "`basename \$f _r1.fastq`_forward.fastq"; done && | |
171 for f in *2segments_r2.fastq; do [ -f "\$f" ] || continue; mv "\$f" "`basename \$f _r2.fastq`_reverse.fastq"; done | |
172 #end if | |
173 | |
174 ]]></command> | |
175 | |
176 <inputs> | |
177 <conditional name="mode"> | |
178 <param name="iformat" type="select" label="Convert from" | |
179 help="Your choice will update the interface to display further choices appropriate for your type of input data."> | |
180 <option value="fastq">fastq: single-end (one file)</option> | |
181 <option value="fastq_pe">fastq: paired-end (two files)</option> | |
182 <option value="sam">sam</option> | |
183 <option value="bam">bam</option> | |
184 </param> | |
185 <when value="fastq"> | |
186 <conditional name="output"> | |
187 <expand macro="to_format" /> | |
188 <when value="sam"> | |
189 <expand macro="se_selector" format="fastq" /> | |
190 <param name="split_on_rgs" type="hidden" value=""/> | |
191 </when> | |
192 <when value="bam"> | |
193 <expand macro="se_selector" format="fastq" /> | |
194 <param name="split_on_rgs" type="hidden" value=""/> | |
195 </when> | |
196 </conditional> | |
197 </when> | |
198 <when value="fastq_pe"> | |
199 <conditional name="output"> | |
200 <expand macro="to_format" /> | |
201 <when value="sam"> | |
202 <expand macro="pe_selector" format="fastq" /> | |
203 <param name="split_on_rgs" type="hidden" value=""/> | |
204 </when> | |
205 <when value="bam"> | |
206 <expand macro="pe_selector" format="fastq" /> | |
207 <param name="split_on_rgs" type="hidden" value=""/> | |
208 </when> | |
209 </conditional> | |
210 </when> | |
211 <when value="sam"> | |
212 <conditional name="output"> | |
213 <expand macro="to_format"> | |
214 <option value="fastq">fastq</option> | |
215 <option value="gz">gzipped fastq</option> | |
216 </expand> | |
217 <when value="fastq"> | |
218 <expand macro="sam_bam_selector" format="sam" /> | |
219 <expand macro="fastq_output_choices" /> | |
220 </when> | |
221 <when value="gz"> | |
222 <expand macro="sam_bam_selector" format="sam" /> | |
223 <expand macro="fastq_output_choices" /> | |
224 </when> | |
225 <when value="bam"> | |
226 <expand macro="sam_bam_selector" format="sam" /> | |
227 <expand macro="bam_output_choices" /> | |
228 </when> | |
229 <when value="sam"> | |
230 <expand macro="sam_bam_selector" format="sam" /> | |
231 <expand macro="bam_output_choices" /> | |
232 </when> | |
233 </conditional> | |
234 </when> | |
235 <when value="bam"> | |
236 <conditional name="output"> | |
237 <expand macro="to_format"> | |
238 <option value="fastq">fastq</option> | |
239 <option value="gz">gzipped fastq</option> | |
240 </expand> | |
241 <when value="fastq"> | |
242 <expand macro="sam_bam_selector" format="bam" /> | |
243 <expand macro="fastq_output_choices" /> | |
244 </when> | |
245 <when value="gz"> | |
246 <expand macro="sam_bam_selector" format="bam" /> | |
247 <expand macro="fastq_output_choices" /> | |
248 </when> | |
249 <when value="bam"> | |
250 <expand macro="sam_bam_selector" format="bam" /> | |
251 <expand macro="bam_output_choices" /> | |
252 </when> | |
253 <when value="sam"> | |
254 <expand macro="sam_bam_selector" format="bam" /> | |
255 <expand macro="bam_output_choices" /> | |
256 </when> | |
257 </conditional> | |
258 </when> | |
259 </conditional> | |
260 </inputs> | |
261 | |
262 <outputs> | |
263 <data name="outputname" format="bam" | |
264 label="Reads converted to ${mode.output.oformat} by ${tool.name} on ${on_string}"> | |
265 <change_format> | |
266 <when input="mode.output.oformat" value="sam" format="sam" /> | |
267 </change_format> | |
268 <filter> | |
269 (mode['output']['oformat'] in ("bam", "sam") and not mode['output']['split_on_rgs']) | |
270 </filter> | |
271 </data> | |
272 <collection name="bam_split_on_read_groups" type="list" | |
273 label="Reads converted to ${mode.output.oformat} by ${tool.name} on ${on_string}"> | |
274 <discover_datasets pattern="__designation_and_ext__" | |
275 directory="converted_data" /> | |
276 <filter> | |
277 (mode['output']['oformat'] in ('bam', 'sam') and mode['output']['split_on_rgs']) | |
278 </filter> | |
279 </collection> | |
280 <collection name="fastq_SE_output_split_on_read_groups" type="list" | |
281 label="Single reads converted to fastq by ${tool.name} on ${on_string}"> | |
282 <discover_datasets | |
283 pattern="(?P<designation>.+)_1segments_r1\.(?P<ext>fastq(\.gz)*)" | |
284 directory="converted_data" /> | |
285 <filter> | |
286 (mode['output']['oformat'] in ('fastq', 'gz') and mode['output']['reads_to_report'] in ('default', 'single')) | |
287 </filter> | |
288 </collection> | |
289 <collection name="fastq_PE_reads_split_on_read_groups" type="list:paired" | |
290 label="Read pairs converted to fastq by ${tool.name} on ${on_string}"> | |
291 <discover_datasets | |
292 pattern="(?P<identifier_0>read_group_.+)_2segments_(?P<identifier_1>(forward|reverse))\.(?P<ext>fastq(\.gz)*)" | |
293 directory="converted_data" /> | |
294 <filter> | |
295 (mode['output']['oformat'] in ('fastq', 'gz') and mode['output']['reads_to_report'] in ('default', 'paired')) | |
296 </filter> | |
297 </collection> | |
298 <collection name="fastq_multi-segment_reads_split_on_read_groups" type="list:list" | |
299 label="Multi-segment reads converted to fastq by ${tool.name} on ${on_string}"> | |
300 <discover_datasets | |
301 pattern="(?P<identifier_0>read_group_.+)_(?P<identifier_1>[3-9]|[1-9][0-9]+)segments_r[0-9]+\.(?P<ext>fastq(\.gz)*)" | |
302 directory="converted_data" /> | |
303 <filter> | |
304 (mode['output']['oformat'] in ('fastq', 'gz') and mode['output']['multisegment_report']) | |
305 </filter> | |
306 </collection> | |
307 </outputs> | |
308 | |
309 <tests> | |
310 <test expect_num_outputs="1"> | |
311 <conditional name="mode"> | |
312 <param name="iformat" value="fastq" /> | |
313 <conditional name="output"> | |
314 <param name="oformat" value="bam" /> | |
315 <conditional name="input"> | |
316 <param name="repr" value="individual" /> | |
317 <repeat name="input_data"> | |
318 <param name="file1" | |
319 value="split_pair_reads_1.fastqsanger" /> | |
320 </repeat> | |
321 </conditional> | |
322 <param name="header" value="header_only.sam" /> | |
323 </conditional> | |
324 </conditional> | |
325 <output name="outputname" file="reads_1_w_header.bam" | |
326 ftype="bam" /> | |
327 </test> | |
328 <test expect_num_outputs="1"> | |
329 <conditional name="mode"> | |
330 <param name="iformat" value="fastq_pe" /> | |
331 <conditional name="output"> | |
332 <param name="oformat" value="bam" /> | |
333 <conditional name="input"> | |
334 <param name="repr" value="individual" /> | |
335 <repeat name="input_data"> | |
336 <param name="file1" | |
337 value="split_pair_reads_1.fastqsanger" /> | |
338 <param name="file2" | |
339 value="split_pair_reads_2.fastqsanger" /> | |
340 </repeat> | |
341 </conditional> | |
342 <param name="header" value="header_only.sam" /> | |
343 </conditional> | |
344 </conditional> | |
345 <output name="outputname" file="reads_1and2_w_header.bam" | |
346 ftype="bam" /> | |
347 </test> | |
348 <test expect_num_outputs="1"> | |
349 <conditional name="mode"> | |
350 <param name="iformat" value="fastq_pe" /> | |
351 <conditional name="output"> | |
352 <param name="oformat" value="bam" /> | |
353 <conditional name="input"> | |
354 <param name="repr" value="collection" /> | |
355 <param name="input_data"> | |
356 <collection type="paired"> | |
357 <element name="forward" | |
358 value="split_pair_reads_1.fastqsanger" /> | |
359 <element name="reverse" | |
360 value="split_pair_reads_2.fastqsanger" /> | |
361 </collection> | |
362 </param> | |
363 </conditional> | |
364 <param name="header" value="header_only.sam" /> | |
365 </conditional> | |
366 </conditional> | |
367 <output name="outputname" file="reads_1and2_w_header.bam" ftype="bam" /> | |
368 </test> | |
369 <test expect_num_outputs="1"> | |
370 <conditional name="mode"> | |
371 <param name="iformat" value="bam" /> | |
372 <conditional name="output"> | |
373 <param name="oformat" value="sam" /> | |
374 <param name="input_data" value="a.bam" /> | |
375 </conditional> | |
376 </conditional> | |
377 <output name="outputname" file="a.sam" ftype="sam" /> | |
378 </test> | |
379 <test> | |
380 <conditional name="mode"> | |
381 <param name="iformat" value="sam" /> | |
382 <conditional name="output"> | |
383 <param name="oformat" value="bam" /> | |
384 <param name="input_data" value="a.sam" /> | |
385 <param name="split_on_rgs" value="true" /> | |
386 </conditional> | |
387 </conditional> | |
388 <output_collection name="bam_split_on_read_groups" type="list" count="2"> | |
389 <element name="read_group_000" file="a_part1.bam" ftype="bam" /> | |
390 </output_collection> | |
391 </test> | |
392 </tests> | |
393 | |
394 <help><![CDATA[ | |
395 .. class:: infomark | |
396 | |
397 **What it does** | |
398 | |
399 The tool converts between different file formats used for storing | |
400 next-generation sequencing data. | |
401 | |
402 As input file types it can handle fastq, SAM or BAM format, which it can | |
403 convert to SAM or BAM format. | |
404 | |
405 **Notes:** | |
406 | |
407 1) The tool can convert fastq files representing data from paired-end | |
408 sequencing runs to appropriate SAM/BAM format provided that the mate | |
409 information is split over two fastq files in corresponding order. | |
410 | |
411 **TIP:** If your paired-end data is arranged differently, you may look into | |
412 the *fastq splitter* and *fastq de-interlacer* tools for Galaxy from the | |
413 `Fastq Manipulation category`_ of the Galaxy Tool Shed to see if they can | |
414 convert your files to the expected format. | |
415 | |
416 2) Merging partial fastq (or gzipped fastq) files into a single SAM/BAM file is | |
417 supported both for single-end and paired-end data. Simply add additional input | |
418 datasets and select the appropriate files (pairs of files in case of paired-end | |
419 data). | |
420 | |
421 Concatenation of SAM/BAM file during conversion is currently not supported. | |
422 | |
423 3) For input in fastq format a SAM header file providing run metadata | |
424 **has to be specified**. The information in this file will be used as the | |
425 header data of the new SAM/BAM file. You can use the *NGS Run Annotation* tool | |
426 to generate a new header file for your data. | |
427 | |
428 For input in SAM/BAM format the tool will simply copy the existing header | |
429 data to the new file. To modify the header of an existing SAM/BAM file, use | |
430 the *Reheader BAM file* tool instead. | |
431 | |
432 .. _Fastq Manipulation category: https://toolshed.g2.bx.psu.edu/repository/browse_repositories_in_category?id=310ff67d4caf6531 | |
433 .. _recipe for using gzipped fastq files in Galaxy: http://mimodd.readthedocs.org/en/latest/recipes.html#use-gzipped-fastq-files-in-galaxy | |
434 .. _MiModD user guide: http://mimodd.readthedocs.org/en/latest | |
435 | |
436 @HELP_FOOTER@ | |
437 ]]></help> | |
438 <expand macro="citations" /> | |
439 </tool> |