Mercurial > repos > artbio > concatenate_multiple_datasets
comparison catWrapper.xml @ 1:3a4694d4354f draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/concat_multi_datasets commit 618a7892f6af26278364a75ab23b3c6d8cdc73db
author | artbio |
---|---|
date | Wed, 20 Mar 2019 07:17:16 -0400 |
parents | 6f54dc6b37da |
children | 1fe4d165ac0e |
comparison
equal
deleted
inserted
replaced
0:6f54dc6b37da | 1:3a4694d4354f |
---|---|
1 <tool id="cat_multi_datasets" name="Concatenate multiple datasets" version="0.3"> | 1 <tool id="cat_multi_datasets" name="Concatenate multiple datasets" version="1.0"> |
2 <description>tail-to-head</description> | 2 <description>tail-to-head by specifying how</description> |
3 <command><![CDATA[ | 3 <command><![CDATA[ |
4 #if $headers == "No": | 4 #if $headers == 0: |
5 cat | 5 #set $concat_command = "cat" |
6 #for $file in $input | |
7 "$file" | |
8 #end for | |
9 > "$out_file1" | |
10 #else: | 6 #else: |
11 #for $file in $input | 7 #set $concat_command = 'tail -q -n +'+ str(int($headers)+1) |
12 printf "# ${file.element_identifier}\n" >> "$out_file1" && | 8 #end if |
13 cat "$file" >> "$out_file1" && | 9 #if $global_condition.input_type == "singles": |
14 #end for | 10 #if $dataset_names == "No": |
15 sleep 1 | 11 $concat_command |
12 #for $file in $global_condition.inputs | |
13 '$file' | |
14 #end for | |
15 > '$out_file1' | |
16 #else: | |
17 #for $file in $global_condition.inputs | |
18 #if $file.ext[-2:] == "gz": | |
19 printf "# ${file.element_identifier}\n" | gzip -c >> '$out_file1' && | |
20 gzip -dc "$file" | $concat_command |gzip -c >> '$out_file1' && | |
21 #else: | |
22 printf "# ${file.element_identifier}\n" >> '$out_file1' && | |
23 $concat_command "$file" >> '$out_file1' && | |
24 #end if | |
25 #end for | |
26 sleep 1 | |
27 #end if | |
28 #else if $global_condition.input_type == "paired_collection": | |
29 #if $global_condition.paired_cat_type == "by_strand": | |
30 #if $dataset_names == "No": | |
31 #for $file in $global_condition.inputs | |
32 $concat_command | |
33 $file['forward'] | |
34 >> '$forward' && | |
35 $concat_command | |
36 $file['reverse'] | |
37 >> '$reverse' && | |
38 #end for | |
39 sleep 1 | |
40 #else: | |
41 #for $file in $global_condition.inputs.keys() | |
42 printf "# ${file}_forward\n" >> '$forward' && | |
43 $concat_command | |
44 $global_condition.inputs[$file]['forward'] | |
45 >> '$forward' && | |
46 printf "# ${file}_reverse\n" >> '$reverse' && | |
47 $concat_command | |
48 $global_condition.inputs[$file]['reverse'] | |
49 >> '$reverse' && | |
50 #end for | |
51 sleep 1 | |
52 #end if | |
53 #else if $global_condition.paired_cat_type == "by_pair": | |
54 mkdir concatenated && | |
55 #if $dataset_names == "No": | |
56 #for $file in $global_condition.inputs.keys() | |
57 $concat_command | |
58 $global_condition.inputs[$file]['forward'] | |
59 > concatenated/'${file}.${global_condition.inputs[$file]['reverse'].ext}.listed' && | |
60 $concat_command | |
61 $global_condition.inputs[$file]['reverse'] | |
62 >> concatenated/'${file}.${global_condition.inputs[$file]['reverse'].ext}.listed' && | |
63 #end for | |
64 sleep 1 | |
65 #else: | |
66 #for $file in $global_condition.inputs.keys() | |
67 printf "# ${file}_forward\n" > concatenated/'${file}.${global_condition.inputs[$file]['reverse'].ext}.listed' && | |
68 $concat_command | |
69 $global_condition.inputs[$file]['forward'] | |
70 >> concatenated/'${file}.${global_condition.inputs[$file]['reverse'].ext}.listed' && | |
71 printf "# ${file}_reverse\n" >> concatenated/'${file}.${global_condition.inputs[$file]['reverse'].ext}.listed' && | |
72 $concat_command | |
73 $global_condition.inputs[$file]['reverse'] | |
74 >> concatenated/'${file}.${global_condition.inputs[$file]['reverse'].ext}.listed' && | |
75 #end for | |
76 sleep 1 | |
77 #end if | |
78 #else if $global_condition.paired_cat_type == "all": | |
79 #if $dataset_names == "No": | |
80 #for $file in $global_condition.inputs.keys() | |
81 $concat_command | |
82 $global_condition.inputs[$file]['forward'] | |
83 >> $out_file1 && | |
84 $concat_command | |
85 $global_condition.inputs[$file]['reverse'] | |
86 >> $out_file1 && | |
87 #end for | |
88 sleep 1 | |
89 #else: | |
90 #for $file in $global_condition.inputs.keys() | |
91 printf "# ${file}_forward\n" > $out_file1 && | |
92 $concat_command | |
93 $global_condition.inputs[$file]['forward'] | |
94 >> $out_file1 && | |
95 printf "# ${file}_reverse\n" >> $out_file1 && | |
96 $concat_command | |
97 $global_condition.inputs[$file]['reverse'] | |
98 >> $out_file1 && | |
99 #end for | |
100 sleep 1 | |
101 #end if | |
102 #end if | |
16 #end if | 103 #end if |
17 ]]> | 104 ]]> |
18 </command> | 105 </command> |
19 <inputs> | 106 <inputs> |
20 <param name="headers" type="select" label="include dataset names"> | 107 <conditional name="global_condition"> |
21 <option value="No" selected="true">No</option> | 108 <param name="input_type" type="select" label="What type of data do you wish to concatenate?" help="Depending on the type of input selected the concatenation options will differ"> |
22 <option value="Yes">Yes</option> | 109 <option value="singles">Single datasets</option> |
23 </param> | 110 <option value="paired_collection">Paired collection</option> |
24 <param name="input" type="data" label="Concatenate Dataset" multiple="True"/> | 111 </param> |
112 <when value="singles"> | |
113 <param name="inputs" type="data" label="Concatenate Datasets" multiple="True" help="All inputed datasets will be concatenated tail-to-head."/> | |
114 </when> | |
115 <when value="paired_collection"> | |
116 <param name="inputs" type="data_collection" collection_type="list:paired" label="Input paired collections to concatenate"/> | |
117 <param name="paired_cat_type" type="select" label="What type of concatenation do you wish to perform?"> | |
118 <option value="by_strand">Concatenate all datsets of same strand (outputs a single pair of datasets)</option> | |
119 <option value="by_pair">Concatenate pairs of datasets (outputs an unpaired collection of datasets)</option> | |
120 <option value="all">Concatenate all datasets into a single file regardless of strand (outputs a single file)</option> | |
121 </param> | |
122 </when> | |
123 </conditional> | |
124 <param name="dataset_names" type="boolean" label="Include dataset names?" truevalue="Yes" falsevalue="No" checked="false" help="If 'Yes' is selected '#name of dataset' will be added when concatenating."/> | |
125 <param name="headers" type="integer" label="Number of lines to skip at the beginning of each concatenation:" value="0" help="This paremeter exists so as to not concatenate comments or headers contained at the start of the files."/> | |
25 </inputs> | 126 </inputs> |
26 <outputs> | 127 <outputs> |
27 <data name="out_file1" format_source="input" metadata_source="input"/> | 128 <data name="out_file1" format_source="inputs" metadata_source="inputs" label="Concatenated datasets"> |
129 <filter>global_condition['input_type'] == 'singles' or (global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'all')</filter> | |
130 </data> | |
131 <collection name="paired_output" type="paired" label="Concatenation by strtand"> | |
132 <data name="forward" /> | |
133 <data name="reverse" /> | |
134 <filter>global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_strand'</filter> | |
135 </collection> | |
136 <collection name="list_output" type="list" label="Concatenation by pairs"> | |
137 <discover_datasets pattern="(?P<name>.*)\.(?P<ext>[^\._]+\.?[^\._])\.listed" visible="false" directory="concatenated"/> | |
138 <filter>global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_pair'</filter> | |
139 </collection> | |
28 </outputs> | 140 </outputs> |
29 <tests> | 141 <tests> |
30 <test> | 142 <!-- Single files concatenation --> |
31 <param name="headers" value="No" /> | 143 <test> <!-- Test 2 single files concatenation with no other option --> |
32 <param name="input" value="1.bed,2.bed"/> | 144 <param name="input_type" value="singles" /> |
145 <param name="inputs" value="1.bed,2.bed"/> | |
146 <param name="dataset_names" value="No" /> | |
147 <param name="headers" value="0" /> | |
33 <output name="out_file1" file="cat_wrapper_out1.bed"/> | 148 <output name="out_file1" file="cat_wrapper_out1.bed"/> |
34 </test> | 149 </test> |
35 <test> | 150 <test> <!-- Test 2 single files concatenation with dataset names activated --> |
36 <param name="headers" value="Yes" /> | 151 <param name="input_type" value="singles" /> |
37 <param name="input" value="1.bed,2.bed"/> | 152 <param name="inputs" value="1.bed,2.bed"/> |
153 <param name="dataset_names" value="Yes" /> | |
154 <param name="headers" value="0" /> | |
38 <output name="out_file1" file="cat_wrapper_out2.bed"/> | 155 <output name="out_file1" file="cat_wrapper_out2.bed"/> |
39 </test> | 156 </test> |
40 | 157 <test> <!-- Test 2 single files concatenation skipping 1 line --> |
158 <param name="input_type" value="singles" /> | |
159 <param name="inputs" value="1.bed,2.bed"/> | |
160 <param name="dataset_names" value="No" /> | |
161 <param name="headers" value="1" /> | |
162 <output name="out_file1" file="cat_wrapper_out3.bed"/> | |
163 </test> | |
164 <test> <!-- Test gz handling with no options --> | |
165 <param name="input_type" value="singles" /> | |
166 <param name="inputs" value="1_f.fastq.gz,1_r.fastq.gz"/> | |
167 <param name="dataset_names" value="No" /> | |
168 <param name="headers" value="0" /> | |
169 <output name="out_file1" file="1.fastq.gz" decompress="True"/> | |
170 </test> | |
171 <test> <!-- Test gz handling with options --> | |
172 <param name="input_type" value="singles" /> | |
173 <param name="inputs" value="1_f.fastq.gz,1_r.fastq.gz"/> | |
174 <param name="dataset_names" value="Yes" /> | |
175 <param name="headers" value="4" /> | |
176 <output name="out_file1" file="1_options.fastq.gz" decompress="True"/> | |
177 </test> | |
178 <!-- Test paired options --> | |
179 <test> <!-- Test paired collection concatenation by_pair with no other option --> | |
180 <param name="input_type" value="paired_collection" /> | |
181 <param name="paired_cat_type" value="by_pair"/> | |
182 <param name="inputs"> | |
183 <collection type="list:paired"> | |
184 <element name="2"> | |
185 <collection type="paired"> | |
186 <element name="forward" value="2_f.fastq"/> | |
187 <element name="reverse" value="2_r.fastq"/> | |
188 </collection> | |
189 </element> | |
190 <element name="3"> | |
191 <collection type="paired"> | |
192 <element name="forward" value="3_f.fastq"/> | |
193 <element name="reverse" value="3_r.fastq"/> | |
194 </collection> | |
195 </element> | |
196 <element name="4"> | |
197 <collection type="paired"> | |
198 <element name="forward" value="4_f.fastq"/> | |
199 <element name="reverse" value="4_r.fastq"/> | |
200 </collection> | |
201 </element> | |
202 </collection> | |
203 </param> | |
204 <param name="dataset_names" value="No" /> | |
205 <param name="headers" value="0" /> | |
206 <output_collection name="list_output" type="list" > | |
207 <element name="2" file="2.fastq"/> | |
208 <element name="3" file="3.fastq"/> | |
209 <element name="4" file="4.fastq"/> | |
210 </output_collection> | |
211 </test> | |
212 <test> <!-- Test paired collection concatenation by_strand with no other option --> | |
213 <param name="input_type" value="paired_collection" /> | |
214 <param name="paired_cat_type" value="by_strand"/> | |
215 <param name="inputs"> | |
216 <collection type="list:paired"> | |
217 <element name="2"> | |
218 <collection type="paired"> | |
219 <element name="forward" value="2_f.fastq"/> | |
220 <element name="reverse" value="2_r.fastq"/> | |
221 </collection> | |
222 </element> | |
223 <element name="3"> | |
224 <collection type="paired"> | |
225 <element name="forward" value="3_f.fastq"/> | |
226 <element name="reverse" value="3_r.fastq"/> | |
227 </collection> | |
228 </element> | |
229 <element name="4"> | |
230 <collection type="paired"> | |
231 <element name="forward" value="4_f.fastq"/> | |
232 <element name="reverse" value="4_r.fastq"/> | |
233 </collection> | |
234 </element> | |
235 </collection> | |
236 </param> | |
237 <param name="dataset_names" value="No" /> | |
238 <param name="headers" value="0" /> | |
239 <output_collection name="paired_output" type="paired" > | |
240 <element name="forward" file="f.fastq"/> | |
241 <element name="reverse" file="r.fastq"/> | |
242 </output_collection> | |
243 </test> | |
41 </tests> | 244 </tests> |
42 <help> | 245 <help> |
43 | 246 |
44 .. class:: warningmark | 247 .. class:: warningmark |
45 | 248 |
46 **WARNING:** This tool does not check if the datasets being concatenated are in the same format. | 249 **WARNING:** This tool does not check if the datasets being concatenated are in the same format. |
47 | 250 |
251 **WARNING:** The paired collection operations do not handle gziped files. | |
252 | |
48 ----- | 253 ----- |
49 | 254 |
50 **What it does** | 255 **What it does** |
51 | 256 |
52 Concatenates datasets | 257 Concatenates datasets and paired collections with multiple options: |
53 | 258 |
54 ----- | 259 - It's possible select either a concatenation by strand, by pair or a whole collection concatenation, when the input is a paired collection. |
55 | 260 |
56 **Example** | 261 - Skipping lines before concatenation to avoid headers |
262 | |
263 - Add the name of the concatenated files as separator | |
264 | |
265 ----- | |
266 | |
267 **Single datasets concatenation example** | |
57 | 268 |
58 Concatenating Dataset:: | 269 Concatenating Dataset:: |
59 | 270 |
60 chrX 151087187 151087355 A 0 - | 271 chrX 151087187 151087355 A 0 - |
61 chrX 151572400 151572481 B 0 + | 272 chrX 151572400 151572481 B 0 + |
81 chr2 100000030 200000955 P 0 + | 292 chr2 100000030 200000955 P 0 + |
82 chr2 100000015 200000999 Q 0 + | 293 chr2 100000015 200000999 Q 0 + |
83 | 294 |
84 ----- | 295 ----- |
85 | 296 |
297 **Paired collection concatenation example** | |
298 | |
299 1rst pair:: | |
300 | |
301 forward - reverse | |
302 | |
303 2nd pair:: | |
304 | |
305 forward - reverse | |
306 | |
307 Concatenation by strand:: | |
308 | |
309 concatenates: | |
310 | |
311 1rst forward + 2nd forward | |
312 1rst reverse + 2nd reverse | |
313 | |
314 outputs: | |
315 | |
316 1 pair | |
317 | |
318 Concatenation by pair:: | |
319 | |
320 concatenates: | |
321 | |
322 1rst forward + 1rst reverse | |
323 2nd forward + 2nd reverse | |
324 | |
325 outputs: | |
326 | |
327 2 datasets | |
328 | |
329 Concatenate all:: | |
330 | |
331 concatenates: | |
332 | |
333 1rst forward + 1rst reverse + 2nd forward + 2nd reverse | |
334 | |
335 outputs: | |
336 | |
337 1 dataset | |
338 | |
339 ----- | |
340 | |
341 **When selecting "Include dataset names" when concatenating files**: | |
342 | |
343 1rst file name="first_tabular":: | |
344 | |
345 chrX 151087187 151087355 A 0 - | |
346 chrX 151572400 151572481 B 0 + | |
347 | |
348 2nd file name="second_tabular":: | |
349 | |
350 chr1 151242630 151242955 X 0 + | |
351 chr1 151271715 151271999 Y 0 + | |
352 chr1 151278832 151279227 Z 0 - | |
353 | |
354 output:: | |
355 | |
356 # first_tabular | |
357 chrX 151087187 151087355 A 0 - | |
358 chrX 151572400 151572481 B 0 + | |
359 # second_tabular | |
360 chr1 151242630 151242955 X 0 + | |
361 chr1 151271715 151271999 Y 0 + | |
362 chr1 151278832 151279227 Z 0 - | |
363 | |
364 ----- | |
365 | |
366 **Skiping lines** | |
367 | |
368 1rst file:: | |
369 | |
370 chrX 151087187 151087355 A 0 - | |
371 chrX 151572400 151572481 B 0 + | |
372 | |
373 2nd file:: | |
374 | |
375 chr1 151242630 151242955 X 0 + | |
376 chr1 151271715 151271999 Y 0 + | |
377 chr1 151278832 151279227 Z 0 - | |
378 | |
379 skipping 1 line | |
380 | |
381 output:: | |
382 | |
383 chrX 151572400 151572481 B 0 + | |
384 chr1 151271715 151271999 Y 0 + | |
385 chr1 151278832 151279227 Z 0 - | |
386 | |
387 ----- | |
388 | |
86 Adapted from galaxy's catWrapper.xml to allow multiple input files. | 389 Adapted from galaxy's catWrapper.xml to allow multiple input files. |
87 | 390 |
88 </help> | 391 </help> |
89 </tool> | 392 </tool> |