comparison convert.xml @ 0:f0f2795de2c7 draft

planemo upload for repository https://github.com/wm75/mimodd_galaxy_wrappers commit 528bcf3b769c7c73f119b2a176d19071f9ef5312
author wolma
date Tue, 19 Dec 2017 04:54:04 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f0f2795de2c7
1 <tool id="mimodd_convert" name="MiModD Convert" version="@MIMODD_WRAPPER_VERSION@">
2 <description>converts sequence data into different formats</description>
3 <macros>
4 <import>macros.xml</import>
5 <macro name="to_format">
6 <param name="oformat" type="select" label="to">
7 <option value="sam">sam</option>
8 <option value="bam">bam</option>
9 <yield />
10 </param>
11 </macro>
12 <macro name="se_selector" token_format="fastq">
13 <conditional name="input">
14 <param name="repr" type="select"
15 label="Single-end input data provided as">
16 <option value="individual">Individual datasets</option>
17 <option value="collection">Collection of datasets</option>
18 </param>
19 <when value="individual">
20 <repeat name="input_data" title="fastq input datasets"
21 default="1" min="1">
22 <param name="file1" type="data" format="@FORMAT@"
23 label="single-end read data"/>
24 </repeat>
25 </when>
26 <when value="collection">
27 <param name="input_data" type="data_collection"
28 collection_type="list" format="fastq, fastq.gz"
29 label="collection of single-end read input datasets" />
30 </when>
31 </conditional>
32 <param name="header" type="data" format="sam" label="Use Header File"
33 help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file."/>
34
35 </macro>
36 <macro name="pe_selector" token_format="fastq">
37 <conditional name="input">
38 <param name="repr" type="select"
39 label="Paired-end input data provided as">
40 <option value="individual">Individual datasets</option>
41 <option value="collection">Paired collection</option>
42 <option value="list_of_pairs">List of pairs</option>
43 </param>
44 <when value="individual">
45 <repeat name="input_data" title="fastq input datasets"
46 default="1" min="1">
47 <param name="file1" type="data" format="@FORMAT@"
48 label="first set of reads of paired-end data"/>
49 <param name="file2" type="data" format="@FORMAT@"
50 label="second set of reads of paired-end data"/>
51 </repeat>
52 </when>
53 <when value="collection">
54 <param name="input_data" type="data_collection"
55 collection_type="paired" format="fastq, fastq.gz"
56 label="paired input dataset collection" />
57 </when>
58 <when value="list_of_pairs">
59 <param name="input_data" type="data_collection"
60 collection_type="list:paired" format="fastq, fastq.gz"
61 label="nested collection of paired input datasets" />
62 </when>
63 </conditional>
64 <param name="header" type="data" format="sam" label="Use Header File"
65 help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file."/>
66
67 </macro>
68 <macro name="sam_bam_selector" token_format="sam">
69 <param name="input_data" type="data" format="@FORMAT@"
70 label="input dataset"/>
71 <param name="header" type="hidden" value="None"/>
72 </macro>
73 <macro name="fastq_output_choices">
74 <param name="split_on_rgs" type="hidden" value=""/>
75 <param name="reads_to_report" type="select" display="radio"
76 label="Types of reads to generate output for"
77 help="By default, the tool will generate two dataset collections, one for single reads found in the input file, and one for paired reads. If you know, in advance, that the input contains only single or only paired reads, you can prevent the generation of an empty dataset collection by selecting the appropriate option here.">
78 <option value="default">Single and paired reads</option>
79 <option value="single">Single reads only</option>
80 <option value="paired">Paired reads only</option>
81 </param>
82 <param name="multisegment_report" type="boolean" checked="false"
83 label="Generate additional output for multi-segment reads"
84 help="Multi-segment reads, as opposed to single- and two-segment/paired reads, are not present in typical NGS data, but allowed in SAM/BAM files. If you expect multi-segment reads in the input file, enable this option." />
85 </macro>
86 <macro name="bam_output_choices">
87 <param name="split_on_rgs" type="boolean" truevalue="--split-on-rgs" falsevalue="" checked="false"
88 label="Split output based on read group IDs"
89 help="If the input file contains reads from different read groups, write them to separate output files; implied automatically for conversions to fastq and gzipped fastq format"/>
90 </macro>
91 </macros>
92 <expand macro="requirements" />
93 <expand macro="stdio" />
94 <expand macro="version_command" />
95 <command><![CDATA[
96 ## Currently Galaxy does not autoconvert collections of fastq.gz files.
97 ## This tool wrapper fixes that by allowing fastq and fastq.gz as input
98 ## collection formats.
99 ## gz_input is then used as flag to indicate a fastq.gz input file
100 #set gz_input = False
101 #if $str($mode.output.split_on_rgs) or $str($mode.output.oformat) == "fastq" or $str($mode.output.oformat) == "gz":
102 mkdir converted_data &&
103 #end if
104
105 mimodd convert
106 #if $str($mode.iformat) in ("sam", "bam"):
107 '${mode.output.input_data}'
108 #else if $str($mode.iformat) == "fastq_pe":
109 #if $str($mode.output.input.repr) == "collection":
110 '$mode.output.input.input_data.forward' '$mode.output.input.input_data.reverse'
111 ## A paired collection - if the forward dataset is gzipped we assume
112 ## the reverse dataset is too.
113 #if $mode.output.input.input_data.forward.is_of_type('fastq.gz'):
114 #set gz_input = True
115 #end if
116 #else
117 #for $i in $mode.output.input.input_data
118 #if $str($mode.output.input.repr) == "individual":
119 '${i.file1}' '${i.file2}'
120 #else
121 '$i.forward' '$i.reverse'
122 ## A list:paired collection - let the last forward dataset
123 ## indicate whether input is gzipped
124 #if $i.forward.is_of_type('fastq.gz'):
125 #set gz_input = True
126 #end if
127 #end if
128 #end for
129 #end if
130 #else
131 #for $i in $mode.output.input.input_data
132 #if $str($mode.output.input.repr) == "collection":
133 '$i'
134 ## A simple collection of files - the last one determines
135 ## whether we assume gzipped input
136 #if $i.is_of_type('fastq.gz'):
137 #set gz_input = True
138 #end if
139 #else
140 '${i.file1}'
141 #end if
142 #end for
143 #end if
144 #if $str($mode.output.header) != "None":
145 --header '$(mode.output.header)'
146 #end if
147 #if $str($outputname) == "None":
148 --ofile converted_data/read_group
149 #else
150 --ofile '$outputname'
151 #end if
152 #if $gz_input:
153 ## a gzipped input dataset was found so lets set --iformat accordingly
154 #if $str($mode.iformat) == "fastq_pe":
155 --iformat gz_pe
156 #else
157 --iformat gz
158 #end if
159 #else
160 --iformat $(mode.iformat)
161 #end if
162 --oformat $(mode.output.oformat)
163 ${mode.output.split_on_rgs}
164
165 #if $str($mode.output.oformat) == "fastq" or $str($mode.output.oformat) == "gz":
166 &&
167 cd converted_data &&
168 for f in *2segments_r1.fastq.gz; do [ -f "\$f" ] || continue; mv "\$f" "`basename \$f _r1.fastq.gz`_forward.fastq.gz"; done &&
169 for f in *2segments_r2.fastq.gz; do [ -f "\$f" ] || continue; mv "\$f" "`basename \$f _r2.fastq.gz`_reverse.fastq.gz"; done &&
170 for f in *2segments_r1.fastq; do [ -f "\$f" ] || continue; mv "\$f" "`basename \$f _r1.fastq`_forward.fastq"; done &&
171 for f in *2segments_r2.fastq; do [ -f "\$f" ] || continue; mv "\$f" "`basename \$f _r2.fastq`_reverse.fastq"; done
172 #end if
173
174 ]]></command>
175
176 <inputs>
177 <conditional name="mode">
178 <param name="iformat" type="select" label="Convert from"
179 help="Your choice will update the interface to display further choices appropriate for your type of input data.">
180 <option value="fastq">fastq: single-end (one file)</option>
181 <option value="fastq_pe">fastq: paired-end (two files)</option>
182 <option value="sam">sam</option>
183 <option value="bam">bam</option>
184 </param>
185 <when value="fastq">
186 <conditional name="output">
187 <expand macro="to_format" />
188 <when value="sam">
189 <expand macro="se_selector" format="fastq" />
190 <param name="split_on_rgs" type="hidden" value=""/>
191 </when>
192 <when value="bam">
193 <expand macro="se_selector" format="fastq" />
194 <param name="split_on_rgs" type="hidden" value=""/>
195 </when>
196 </conditional>
197 </when>
198 <when value="fastq_pe">
199 <conditional name="output">
200 <expand macro="to_format" />
201 <when value="sam">
202 <expand macro="pe_selector" format="fastq" />
203 <param name="split_on_rgs" type="hidden" value=""/>
204 </when>
205 <when value="bam">
206 <expand macro="pe_selector" format="fastq" />
207 <param name="split_on_rgs" type="hidden" value=""/>
208 </when>
209 </conditional>
210 </when>
211 <when value="sam">
212 <conditional name="output">
213 <expand macro="to_format">
214 <option value="fastq">fastq</option>
215 <option value="gz">gzipped fastq</option>
216 </expand>
217 <when value="fastq">
218 <expand macro="sam_bam_selector" format="sam" />
219 <expand macro="fastq_output_choices" />
220 </when>
221 <when value="gz">
222 <expand macro="sam_bam_selector" format="sam" />
223 <expand macro="fastq_output_choices" />
224 </when>
225 <when value="bam">
226 <expand macro="sam_bam_selector" format="sam" />
227 <expand macro="bam_output_choices" />
228 </when>
229 <when value="sam">
230 <expand macro="sam_bam_selector" format="sam" />
231 <expand macro="bam_output_choices" />
232 </when>
233 </conditional>
234 </when>
235 <when value="bam">
236 <conditional name="output">
237 <expand macro="to_format">
238 <option value="fastq">fastq</option>
239 <option value="gz">gzipped fastq</option>
240 </expand>
241 <when value="fastq">
242 <expand macro="sam_bam_selector" format="bam" />
243 <expand macro="fastq_output_choices" />
244 </when>
245 <when value="gz">
246 <expand macro="sam_bam_selector" format="bam" />
247 <expand macro="fastq_output_choices" />
248 </when>
249 <when value="bam">
250 <expand macro="sam_bam_selector" format="bam" />
251 <expand macro="bam_output_choices" />
252 </when>
253 <when value="sam">
254 <expand macro="sam_bam_selector" format="bam" />
255 <expand macro="bam_output_choices" />
256 </when>
257 </conditional>
258 </when>
259 </conditional>
260 </inputs>
261
262 <outputs>
263 <data name="outputname" format="bam"
264 label="Reads converted to ${mode.output.oformat} by ${tool.name} on ${on_string}">
265 <change_format>
266 <when input="mode.output.oformat" value="sam" format="sam" />
267 </change_format>
268 <filter>
269 (mode['output']['oformat'] in ("bam", "sam") and not mode['output']['split_on_rgs'])
270 </filter>
271 </data>
272 <collection name="bam_split_on_read_groups" type="list"
273 label="Reads converted to ${mode.output.oformat} by ${tool.name} on ${on_string}">
274 <discover_datasets pattern="__designation_and_ext__"
275 directory="converted_data" />
276 <filter>
277 (mode['output']['oformat'] in ('bam', 'sam') and mode['output']['split_on_rgs'])
278 </filter>
279 </collection>
280 <collection name="fastq_SE_output_split_on_read_groups" type="list"
281 label="Single reads converted to fastq by ${tool.name} on ${on_string}">
282 <discover_datasets
283 pattern="(?P&lt;designation&gt;.+)_1segments_r1\.(?P&lt;ext&gt;fastq(\.gz)*)"
284 directory="converted_data" />
285 <filter>
286 (mode['output']['oformat'] in ('fastq', 'gz') and mode['output']['reads_to_report'] in ('default', 'single'))
287 </filter>
288 </collection>
289 <collection name="fastq_PE_reads_split_on_read_groups" type="list:paired"
290 label="Read pairs converted to fastq by ${tool.name} on ${on_string}">
291 <discover_datasets
292 pattern="(?P&lt;identifier_0&gt;read_group_.+)_2segments_(?P&lt;identifier_1&gt;(forward|reverse))\.(?P&lt;ext&gt;fastq(\.gz)*)"
293 directory="converted_data" />
294 <filter>
295 (mode['output']['oformat'] in ('fastq', 'gz') and mode['output']['reads_to_report'] in ('default', 'paired'))
296 </filter>
297 </collection>
298 <collection name="fastq_multi-segment_reads_split_on_read_groups" type="list:list"
299 label="Multi-segment reads converted to fastq by ${tool.name} on ${on_string}">
300 <discover_datasets
301 pattern="(?P&lt;identifier_0&gt;read_group_.+)_(?P&lt;identifier_1&gt;[3-9]|[1-9][0-9]+)segments_r[0-9]+\.(?P&lt;ext&gt;fastq(\.gz)*)"
302 directory="converted_data" />
303 <filter>
304 (mode['output']['oformat'] in ('fastq', 'gz') and mode['output']['multisegment_report'])
305 </filter>
306 </collection>
307 </outputs>
308
309 <tests>
310 <test expect_num_outputs="1">
311 <conditional name="mode">
312 <param name="iformat" value="fastq" />
313 <conditional name="output">
314 <param name="oformat" value="bam" />
315 <conditional name="input">
316 <param name="repr" value="individual" />
317 <repeat name="input_data">
318 <param name="file1"
319 value="split_pair_reads_1.fastqsanger" />
320 </repeat>
321 </conditional>
322 <param name="header" value="header_only.sam" />
323 </conditional>
324 </conditional>
325 <output name="outputname" file="reads_1_w_header.bam"
326 ftype="bam" />
327 </test>
328 <test expect_num_outputs="1">
329 <conditional name="mode">
330 <param name="iformat" value="fastq_pe" />
331 <conditional name="output">
332 <param name="oformat" value="bam" />
333 <conditional name="input">
334 <param name="repr" value="individual" />
335 <repeat name="input_data">
336 <param name="file1"
337 value="split_pair_reads_1.fastqsanger" />
338 <param name="file2"
339 value="split_pair_reads_2.fastqsanger" />
340 </repeat>
341 </conditional>
342 <param name="header" value="header_only.sam" />
343 </conditional>
344 </conditional>
345 <output name="outputname" file="reads_1and2_w_header.bam"
346 ftype="bam" />
347 </test>
348 <test expect_num_outputs="1">
349 <conditional name="mode">
350 <param name="iformat" value="fastq_pe" />
351 <conditional name="output">
352 <param name="oformat" value="bam" />
353 <conditional name="input">
354 <param name="repr" value="collection" />
355 <param name="input_data">
356 <collection type="paired">
357 <element name="forward"
358 value="split_pair_reads_1.fastqsanger" />
359 <element name="reverse"
360 value="split_pair_reads_2.fastqsanger" />
361 </collection>
362 </param>
363 </conditional>
364 <param name="header" value="header_only.sam" />
365 </conditional>
366 </conditional>
367 <output name="outputname" file="reads_1and2_w_header.bam" ftype="bam" />
368 </test>
369 <test expect_num_outputs="1">
370 <conditional name="mode">
371 <param name="iformat" value="bam" />
372 <conditional name="output">
373 <param name="oformat" value="sam" />
374 <param name="input_data" value="a.bam" />
375 </conditional>
376 </conditional>
377 <output name="outputname" file="a.sam" ftype="sam" />
378 </test>
379 <test>
380 <conditional name="mode">
381 <param name="iformat" value="sam" />
382 <conditional name="output">
383 <param name="oformat" value="bam" />
384 <param name="input_data" value="a.sam" />
385 <param name="split_on_rgs" value="true" />
386 </conditional>
387 </conditional>
388 <output_collection name="bam_split_on_read_groups" type="list" count="2">
389 <element name="read_group_000" file="a_part1.bam" ftype="bam" />
390 </output_collection>
391 </test>
392 </tests>
393
394 <help><![CDATA[
395 .. class:: infomark
396
397 **What it does**
398
399 The tool converts between different file formats used for storing
400 next-generation sequencing data.
401
402 As input file types it can handle fastq, SAM or BAM format, which it can
403 convert to SAM or BAM format.
404
405 **Notes:**
406
407 1) The tool can convert fastq files representing data from paired-end
408 sequencing runs to appropriate SAM/BAM format provided that the mate
409 information is split over two fastq files in corresponding order.
410
411 **TIP:** If your paired-end data is arranged differently, you may look into
412 the *fastq splitter* and *fastq de-interlacer* tools for Galaxy from the
413 `Fastq Manipulation category`_ of the Galaxy Tool Shed to see if they can
414 convert your files to the expected format.
415
416 2) Merging partial fastq (or gzipped fastq) files into a single SAM/BAM file is
417 supported both for single-end and paired-end data. Simply add additional input
418 datasets and select the appropriate files (pairs of files in case of paired-end
419 data).
420
421 Concatenation of SAM/BAM file during conversion is currently not supported.
422
423 3) For input in fastq format a SAM header file providing run metadata
424 **has to be specified**. The information in this file will be used as the
425 header data of the new SAM/BAM file. You can use the *NGS Run Annotation* tool
426 to generate a new header file for your data.
427
428 For input in SAM/BAM format the tool will simply copy the existing header
429 data to the new file. To modify the header of an existing SAM/BAM file, use
430 the *Reheader BAM file* tool instead.
431
432 .. _Fastq Manipulation category: https://toolshed.g2.bx.psu.edu/repository/browse_repositories_in_category?id=310ff67d4caf6531
433 .. _recipe for using gzipped fastq files in Galaxy: http://mimodd.readthedocs.org/en/latest/recipes.html#use-gzipped-fastq-files-in-galaxy
434 .. _MiModD user guide: http://mimodd.readthedocs.org/en/latest
435
436 @HELP_FOOTER@
437 ]]></help>
438 <expand macro="citations" />
439 </tool>