comparison catWrapper.xml @ 1:3a4694d4354f draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/concat_multi_datasets commit 618a7892f6af26278364a75ab23b3c6d8cdc73db
author artbio
date Wed, 20 Mar 2019 07:17:16 -0400
parents 6f54dc6b37da
children 1fe4d165ac0e
comparison
equal deleted inserted replaced
0:6f54dc6b37da 1:3a4694d4354f
1 <tool id="cat_multi_datasets" name="Concatenate multiple datasets" version="0.3"> 1 <tool id="cat_multi_datasets" name="Concatenate multiple datasets" version="1.0">
2 <description>tail-to-head</description> 2 <description>tail-to-head by specifying how</description>
3 <command><![CDATA[ 3 <command><![CDATA[
4 #if $headers == "No": 4 #if $headers == 0:
5 cat 5 #set $concat_command = "cat"
6 #for $file in $input
7 "$file"
8 #end for
9 > "$out_file1"
10 #else: 6 #else:
11 #for $file in $input 7 #set $concat_command = 'tail -q -n +'+ str(int($headers)+1)
12 printf "# ${file.element_identifier}\n" >> "$out_file1" && 8 #end if
13 cat "$file" >> "$out_file1" && 9 #if $global_condition.input_type == "singles":
14 #end for 10 #if $dataset_names == "No":
15 sleep 1 11 $concat_command
12 #for $file in $global_condition.inputs
13 '$file'
14 #end for
15 > '$out_file1'
16 #else:
17 #for $file in $global_condition.inputs
18 #if $file.ext[-2:] == "gz":
19 printf "# ${file.element_identifier}\n" | gzip -c >> '$out_file1' &&
20 gzip -dc "$file" | $concat_command |gzip -c >> '$out_file1' &&
21 #else:
22 printf "# ${file.element_identifier}\n" >> '$out_file1' &&
23 $concat_command "$file" >> '$out_file1' &&
24 #end if
25 #end for
26 sleep 1
27 #end if
28 #else if $global_condition.input_type == "paired_collection":
29 #if $global_condition.paired_cat_type == "by_strand":
30 #if $dataset_names == "No":
31 #for $file in $global_condition.inputs
32 $concat_command
33 $file['forward']
34 >> '$forward' &&
35 $concat_command
36 $file['reverse']
37 >> '$reverse' &&
38 #end for
39 sleep 1
40 #else:
41 #for $file in $global_condition.inputs.keys()
42 printf "# ${file}_forward\n" >> '$forward' &&
43 $concat_command
44 $global_condition.inputs[$file]['forward']
45 >> '$forward' &&
46 printf "# ${file}_reverse\n" >> '$reverse' &&
47 $concat_command
48 $global_condition.inputs[$file]['reverse']
49 >> '$reverse' &&
50 #end for
51 sleep 1
52 #end if
53 #else if $global_condition.paired_cat_type == "by_pair":
54 mkdir concatenated &&
55 #if $dataset_names == "No":
56 #for $file in $global_condition.inputs.keys()
57 $concat_command
58 $global_condition.inputs[$file]['forward']
59 > concatenated/'${file}.${global_condition.inputs[$file]['reverse'].ext}.listed' &&
60 $concat_command
61 $global_condition.inputs[$file]['reverse']
62 >> concatenated/'${file}.${global_condition.inputs[$file]['reverse'].ext}.listed' &&
63 #end for
64 sleep 1
65 #else:
66 #for $file in $global_condition.inputs.keys()
67 printf "# ${file}_forward\n" > concatenated/'${file}.${global_condition.inputs[$file]['reverse'].ext}.listed' &&
68 $concat_command
69 $global_condition.inputs[$file]['forward']
70 >> concatenated/'${file}.${global_condition.inputs[$file]['reverse'].ext}.listed' &&
71 printf "# ${file}_reverse\n" >> concatenated/'${file}.${global_condition.inputs[$file]['reverse'].ext}.listed' &&
72 $concat_command
73 $global_condition.inputs[$file]['reverse']
74 >> concatenated/'${file}.${global_condition.inputs[$file]['reverse'].ext}.listed' &&
75 #end for
76 sleep 1
77 #end if
78 #else if $global_condition.paired_cat_type == "all":
79 #if $dataset_names == "No":
80 #for $file in $global_condition.inputs.keys()
81 $concat_command
82 $global_condition.inputs[$file]['forward']
83 >> $out_file1 &&
84 $concat_command
85 $global_condition.inputs[$file]['reverse']
86 >> $out_file1 &&
87 #end for
88 sleep 1
89 #else:
90 #for $file in $global_condition.inputs.keys()
91 printf "# ${file}_forward\n" > $out_file1 &&
92 $concat_command
93 $global_condition.inputs[$file]['forward']
94 >> $out_file1 &&
95 printf "# ${file}_reverse\n" >> $out_file1 &&
96 $concat_command
97 $global_condition.inputs[$file]['reverse']
98 >> $out_file1 &&
99 #end for
100 sleep 1
101 #end if
102 #end if
16 #end if 103 #end if
17 ]]> 104 ]]>
18 </command> 105 </command>
19 <inputs> 106 <inputs>
20 <param name="headers" type="select" label="include dataset names"> 107 <conditional name="global_condition">
21 <option value="No" selected="true">No</option> 108 <param name="input_type" type="select" label="What type of data do you wish to concatenate?" help="Depending on the type of input selected the concatenation options will differ">
22 <option value="Yes">Yes</option> 109 <option value="singles">Single datasets</option>
23 </param> 110 <option value="paired_collection">Paired collection</option>
24 <param name="input" type="data" label="Concatenate Dataset" multiple="True"/> 111 </param>
112 <when value="singles">
113 <param name="inputs" type="data" label="Concatenate Datasets" multiple="True" help="All inputed datasets will be concatenated tail-to-head."/>
114 </when>
115 <when value="paired_collection">
116 <param name="inputs" type="data_collection" collection_type="list:paired" label="Input paired collections to concatenate"/>
117 <param name="paired_cat_type" type="select" label="What type of concatenation do you wish to perform?">
118 <option value="by_strand">Concatenate all datsets of same strand (outputs a single pair of datasets)</option>
119 <option value="by_pair">Concatenate pairs of datasets (outputs an unpaired collection of datasets)</option>
120 <option value="all">Concatenate all datasets into a single file regardless of strand (outputs a single file)</option>
121 </param>
122 </when>
123 </conditional>
124 <param name="dataset_names" type="boolean" label="Include dataset names?" truevalue="Yes" falsevalue="No" checked="false" help="If 'Yes' is selected '#name of dataset' will be added when concatenating."/>
125 <param name="headers" type="integer" label="Number of lines to skip at the beginning of each concatenation:" value="0" help="This paremeter exists so as to not concatenate comments or headers contained at the start of the files."/>
25 </inputs> 126 </inputs>
26 <outputs> 127 <outputs>
27 <data name="out_file1" format_source="input" metadata_source="input"/> 128 <data name="out_file1" format_source="inputs" metadata_source="inputs" label="Concatenated datasets">
129 <filter>global_condition['input_type'] == 'singles' or (global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'all')</filter>
130 </data>
131 <collection name="paired_output" type="paired" label="Concatenation by strtand">
132 <data name="forward" />
133 <data name="reverse" />
134 <filter>global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_strand'</filter>
135 </collection>
136 <collection name="list_output" type="list" label="Concatenation by pairs">
137 <discover_datasets pattern="(?P&lt;name&gt;.*)\.(?P&lt;ext&gt;[^\._]+\.?[^\._])\.listed" visible="false" directory="concatenated"/>
138 <filter>global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_pair'</filter>
139 </collection>
28 </outputs> 140 </outputs>
29 <tests> 141 <tests>
30 <test> 142 <!-- Single files concatenation -->
31 <param name="headers" value="No" /> 143 <test> <!-- Test 2 single files concatenation with no other option -->
32 <param name="input" value="1.bed,2.bed"/> 144 <param name="input_type" value="singles" />
145 <param name="inputs" value="1.bed,2.bed"/>
146 <param name="dataset_names" value="No" />
147 <param name="headers" value="0" />
33 <output name="out_file1" file="cat_wrapper_out1.bed"/> 148 <output name="out_file1" file="cat_wrapper_out1.bed"/>
34 </test> 149 </test>
35 <test> 150 <test> <!-- Test 2 single files concatenation with dataset names activated -->
36 <param name="headers" value="Yes" /> 151 <param name="input_type" value="singles" />
37 <param name="input" value="1.bed,2.bed"/> 152 <param name="inputs" value="1.bed,2.bed"/>
153 <param name="dataset_names" value="Yes" />
154 <param name="headers" value="0" />
38 <output name="out_file1" file="cat_wrapper_out2.bed"/> 155 <output name="out_file1" file="cat_wrapper_out2.bed"/>
39 </test> 156 </test>
40 157 <test> <!-- Test 2 single files concatenation skipping 1 line -->
158 <param name="input_type" value="singles" />
159 <param name="inputs" value="1.bed,2.bed"/>
160 <param name="dataset_names" value="No" />
161 <param name="headers" value="1" />
162 <output name="out_file1" file="cat_wrapper_out3.bed"/>
163 </test>
164 <test> <!-- Test gz handling with no options -->
165 <param name="input_type" value="singles" />
166 <param name="inputs" value="1_f.fastq.gz,1_r.fastq.gz"/>
167 <param name="dataset_names" value="No" />
168 <param name="headers" value="0" />
169 <output name="out_file1" file="1.fastq.gz" decompress="True"/>
170 </test>
171 <test> <!-- Test gz handling with options -->
172 <param name="input_type" value="singles" />
173 <param name="inputs" value="1_f.fastq.gz,1_r.fastq.gz"/>
174 <param name="dataset_names" value="Yes" />
175 <param name="headers" value="4" />
176 <output name="out_file1" file="1_options.fastq.gz" decompress="True"/>
177 </test>
178 <!-- Test paired options -->
179 <test> <!-- Test paired collection concatenation by_pair with no other option -->
180 <param name="input_type" value="paired_collection" />
181 <param name="paired_cat_type" value="by_pair"/>
182 <param name="inputs">
183 <collection type="list:paired">
184 <element name="2">
185 <collection type="paired">
186 <element name="forward" value="2_f.fastq"/>
187 <element name="reverse" value="2_r.fastq"/>
188 </collection>
189 </element>
190 <element name="3">
191 <collection type="paired">
192 <element name="forward" value="3_f.fastq"/>
193 <element name="reverse" value="3_r.fastq"/>
194 </collection>
195 </element>
196 <element name="4">
197 <collection type="paired">
198 <element name="forward" value="4_f.fastq"/>
199 <element name="reverse" value="4_r.fastq"/>
200 </collection>
201 </element>
202 </collection>
203 </param>
204 <param name="dataset_names" value="No" />
205 <param name="headers" value="0" />
206 <output_collection name="list_output" type="list" >
207 <element name="2" file="2.fastq"/>
208 <element name="3" file="3.fastq"/>
209 <element name="4" file="4.fastq"/>
210 </output_collection>
211 </test>
212 <test> <!-- Test paired collection concatenation by_strand with no other option -->
213 <param name="input_type" value="paired_collection" />
214 <param name="paired_cat_type" value="by_strand"/>
215 <param name="inputs">
216 <collection type="list:paired">
217 <element name="2">
218 <collection type="paired">
219 <element name="forward" value="2_f.fastq"/>
220 <element name="reverse" value="2_r.fastq"/>
221 </collection>
222 </element>
223 <element name="3">
224 <collection type="paired">
225 <element name="forward" value="3_f.fastq"/>
226 <element name="reverse" value="3_r.fastq"/>
227 </collection>
228 </element>
229 <element name="4">
230 <collection type="paired">
231 <element name="forward" value="4_f.fastq"/>
232 <element name="reverse" value="4_r.fastq"/>
233 </collection>
234 </element>
235 </collection>
236 </param>
237 <param name="dataset_names" value="No" />
238 <param name="headers" value="0" />
239 <output_collection name="paired_output" type="paired" >
240 <element name="forward" file="f.fastq"/>
241 <element name="reverse" file="r.fastq"/>
242 </output_collection>
243 </test>
41 </tests> 244 </tests>
42 <help> 245 <help>
43 246
44 .. class:: warningmark 247 .. class:: warningmark
45 248
46 **WARNING:** This tool does not check if the datasets being concatenated are in the same format. 249 **WARNING:** This tool does not check if the datasets being concatenated are in the same format.
47 250
251 **WARNING:** The paired collection operations do not handle gziped files.
252
48 ----- 253 -----
49 254
50 **What it does** 255 **What it does**
51 256
52 Concatenates datasets 257 Concatenates datasets and paired collections with multiple options:
53 258
54 ----- 259 - It's possible select either a concatenation by strand, by pair or a whole collection concatenation, when the input is a paired collection.
55 260
56 **Example** 261 - Skipping lines before concatenation to avoid headers
262
263 - Add the name of the concatenated files as separator
264
265 -----
266
267 **Single datasets concatenation example**
57 268
58 Concatenating Dataset:: 269 Concatenating Dataset::
59 270
60 chrX 151087187 151087355 A 0 - 271 chrX 151087187 151087355 A 0 -
61 chrX 151572400 151572481 B 0 + 272 chrX 151572400 151572481 B 0 +
81 chr2 100000030 200000955 P 0 + 292 chr2 100000030 200000955 P 0 +
82 chr2 100000015 200000999 Q 0 + 293 chr2 100000015 200000999 Q 0 +
83 294
84 ----- 295 -----
85 296
297 **Paired collection concatenation example**
298
299 1rst pair::
300
301 forward - reverse
302
303 2nd pair::
304
305 forward - reverse
306
307 Concatenation by strand::
308
309 concatenates:
310
311 1rst forward + 2nd forward
312 1rst reverse + 2nd reverse
313
314 outputs:
315
316 1 pair
317
318 Concatenation by pair::
319
320 concatenates:
321
322 1rst forward + 1rst reverse
323 2nd forward + 2nd reverse
324
325 outputs:
326
327 2 datasets
328
329 Concatenate all::
330
331 concatenates:
332
333 1rst forward + 1rst reverse + 2nd forward + 2nd reverse
334
335 outputs:
336
337 1 dataset
338
339 -----
340
341 **When selecting "Include dataset names" when concatenating files**:
342
343 1rst file name="first_tabular"::
344
345 chrX 151087187 151087355 A 0 -
346 chrX 151572400 151572481 B 0 +
347
348 2nd file name="second_tabular"::
349
350 chr1 151242630 151242955 X 0 +
351 chr1 151271715 151271999 Y 0 +
352 chr1 151278832 151279227 Z 0 -
353
354 output::
355
356 # first_tabular
357 chrX 151087187 151087355 A 0 -
358 chrX 151572400 151572481 B 0 +
359 # second_tabular
360 chr1 151242630 151242955 X 0 +
361 chr1 151271715 151271999 Y 0 +
362 chr1 151278832 151279227 Z 0 -
363
364 -----
365
366 **Skiping lines**
367
368 1rst file::
369
370 chrX 151087187 151087355 A 0 -
371 chrX 151572400 151572481 B 0 +
372
373 2nd file::
374
375 chr1 151242630 151242955 X 0 +
376 chr1 151271715 151271999 Y 0 +
377 chr1 151278832 151279227 Z 0 -
378
379 skipping 1 line
380
381 output::
382
383 chrX 151572400 151572481 B 0 +
384 chr1 151271715 151271999 Y 0 +
385 chr1 151278832 151279227 Z 0 -
386
387 -----
388
86 Adapted from galaxy's catWrapper.xml to allow multiple input files. 389 Adapted from galaxy's catWrapper.xml to allow multiple input files.
87 390
88 </help> 391 </help>
89 </tool> 392 </tool>