Mercurial > repos > bgruening > split_file_to_collection
comparison split_file_to_collection.xml @ 3:2ddc36385d7a draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 8d069684e155d2f5b6fae06d14d98ce41321da53"
author | bgruening |
---|---|
date | Tue, 10 Sep 2019 12:31:15 -0400 |
parents | d150ac3d853d |
children | 0850f2dfba13 |
comparison
equal
deleted
inserted
replaced
2:d150ac3d853d | 3:2ddc36385d7a |
---|---|
48 --by '$split_parms.split_by.select_split_by' | 48 --by '$split_parms.split_by.select_split_by' |
49 #if $split_parms.split_by.select_split_by == "col": | 49 #if $split_parms.split_by.select_split_by == "col": |
50 --id_column '$split_parms.split_by.id_col' | 50 --id_column '$split_parms.split_by.id_col' |
51 --match '$split_parms.split_by.match_regex' | 51 --match '$split_parms.split_by.match_regex' |
52 --sub '$split_parms.split_by.sub_regex' | 52 --sub '$split_parms.split_by.sub_regex' |
53 #else | 53 #else |
54 --numnew '$split_parms.split_by.numnew' | 54 --numnew '$split_parms.split_by.numnew' |
55 #if $split_parms.split_by.select_allocate.allocate == "random": | 55 #if $split_parms.split_by.select_allocate.allocate == "random": |
56 --rand | 56 --rand |
57 --seed '$split_parms.split_by.rand.seed' | 57 --seed '$split_parms.split_by.rand.seed' |
58 #end if | 58 #end if |
59 #if $split_parms.split_by.select_allocate.allocate == "batch": | 59 #if $split_parms.split_by.select_allocate.allocate == "batch": |
188 <param name="input" value="test.tabular" ftype="tabular"/> | 188 <param name="input" value="test.tabular" ftype="tabular"/> |
189 <param name="select_ftype" value="tabular"/> | 189 <param name="select_ftype" value="tabular"/> |
190 <param name="select_split_by" value="row"/> | 190 <param name="select_split_by" value="row"/> |
191 <param name="top" value="2"/> | 191 <param name="top" value="2"/> |
192 <param name="numnew" value="2"/> | 192 <param name="numnew" value="2"/> |
193 <param name="newfilenames" value="test"/> | 193 <param name="newfilenames" value="test"/> |
194 <output_collection name="list_output_tab" type="list"> | 194 <output_collection name="list_output_tab" type="list"> |
195 <element name="test_0.tabular" file="test_0.tabular" ftype="tabular"/> | 195 <element name="test_000000.tabular" file="test_0.tabular" ftype="tabular"/> |
196 <element name="test_1.tabular" file="test_1.tabular" ftype="tabular"/> | 196 <element name="test_000001.tabular" file="test_1.tabular" ftype="tabular"/> |
197 </output_collection> | 197 </output_collection> |
198 </test> | 198 </test> |
199 <test> | 199 <test> |
200 <param name="input" value="test.tabular" ftype="tabular"/> | 200 <param name="input" value="test.tabular" ftype="tabular"/> |
201 <param name="select_ftype" value="tabular"/> | 201 <param name="select_ftype" value="tabular"/> |
202 <param name="select_split_by" value="row"/> | 202 <param name="select_split_by" value="row"/> |
203 <param name="top" value="2"/> | 203 <param name="top" value="2"/> |
204 <param name="numnew" value="2"/> | 204 <param name="numnew" value="2"/> |
205 <param name="newfilenames" value="batch_tab"/> | 205 <param name="newfilenames" value="batch_tab"/> |
206 <param name="allocate" value="batch"/> | 206 <param name="allocate" value="batch"/> |
207 <output_collection name="list_output_tab" type="list"> | 207 <output_collection name="list_output_tab" type="list"> |
208 <element name="batch_tab_0.tabular" file="batch_tab_0.tabular" ftype="tabular"/> | 208 <element name="batch_tab_000000.tabular" file="batch_tab_0.tabular" ftype="tabular"/> |
209 <element name="batch_tab_1.tabular" file="batch_tab_1.tabular" ftype="tabular"/> | 209 <element name="batch_tab_000001.tabular" file="batch_tab_1.tabular" ftype="tabular"/> |
210 </output_collection> | |
211 </test> | |
212 <test> | |
213 <param name="select_ftype" value="txt"/> | |
214 <param name="input" value="karyotype.txt" ftype="txt"/> | |
215 <param name="numnew" value="24"/> | |
216 <param name="newfilenames" value="chr"/> | |
217 <param name="allocate" value="batch"/> | |
218 | |
219 <output_collection name="list_output_txt" type="list"> | |
220 <element name="chr_000000.txt" file="chr_000000.txt" ftype="txt"/> | |
221 <element name="chr_000001.txt" file="chr_000001.txt" ftype="txt"/> | |
222 <element name="chr_000002.txt" file="chr_000002.txt" ftype="txt"/> | |
223 <element name="chr_000003.txt" file="chr_000003.txt" ftype="txt"/> | |
224 <element name="chr_000004.txt" file="chr_000004.txt" ftype="txt"/> | |
225 <element name="chr_000005.txt" file="chr_000005.txt" ftype="txt"/> | |
226 <element name="chr_000006.txt" file="chr_000006.txt" ftype="txt"/> | |
227 <element name="chr_000007.txt" file="chr_000007.txt" ftype="txt"/> | |
228 <element name="chr_000008.txt" file="chr_000008.txt" ftype="txt"/> | |
229 <element name="chr_000009.txt" file="chr_000009.txt" ftype="txt"/> | |
230 <element name="chr_000010.txt" file="chr_000010.txt" ftype="txt"/> | |
231 <element name="chr_000011.txt" file="chr_000011.txt" ftype="txt"/> | |
232 <element name="chr_000012.txt" file="chr_000012.txt" ftype="txt"/> | |
233 <element name="chr_000013.txt" file="chr_000013.txt" ftype="txt"/> | |
234 <element name="chr_000014.txt" file="chr_000014.txt" ftype="txt"/> | |
235 <element name="chr_000015.txt" file="chr_000015.txt" ftype="txt"/> | |
236 <element name="chr_000016.txt" file="chr_000016.txt" ftype="txt"/> | |
237 <element name="chr_000017.txt" file="chr_000017.txt" ftype="txt"/> | |
238 <element name="chr_000018.txt" file="chr_000018.txt" ftype="txt"/> | |
239 <element name="chr_000019.txt" file="chr_000019.txt" ftype="txt"/> | |
240 <element name="chr_000020.txt" file="chr_000020.txt" ftype="txt"/> | |
241 <element name="chr_000021.txt" file="chr_000021.txt" ftype="txt"/> | |
242 <element name="chr_000022.txt" file="chr_000022.txt" ftype="txt"/> | |
243 <element name="chr_000023.txt" file="chr_000023.txt" ftype="txt"/> | |
210 </output_collection> | 244 </output_collection> |
211 </test> | 245 </test> |
212 <test> | 246 <test> |
213 <param name="input" value="psm.tabular" ftype="tabular"/> | 247 <param name="input" value="psm.tabular" ftype="tabular"/> |
214 <param name="select_ftype" value="tabular"/> | 248 <param name="select_ftype" value="tabular"/> |
228 <param name="input" value="demo758Dacentroid.mgf" ftype="mgf"/> | 262 <param name="input" value="demo758Dacentroid.mgf" ftype="mgf"/> |
229 <param name="select_ftype" value="mgf"/> | 263 <param name="select_ftype" value="mgf"/> |
230 <param name="numnew" value="3"/> | 264 <param name="numnew" value="3"/> |
231 <param name="newfilenames" value="demo"/> | 265 <param name="newfilenames" value="demo"/> |
232 <output_collection name="list_output_mgf" type="list"> | 266 <output_collection name="list_output_mgf" type="list"> |
233 <element name="demo_0.mgf" file="demo_0.mgf" ftype="mgf"/> | 267 <element name="demo_000000.mgf" file="demo_0.mgf" ftype="mgf"/> |
234 <element name="demo_1.mgf" file="demo_1.mgf" ftype="mgf"/> | 268 <element name="demo_000001.mgf" file="demo_1.mgf" ftype="mgf"/> |
235 <element name="demo_2.mgf" file="demo_2.mgf" ftype="mgf"/> | 269 <element name="demo_000002.mgf" file="demo_2.mgf" ftype="mgf"/> |
236 </output_collection> | 270 </output_collection> |
237 </test> | 271 </test> |
238 <test> | 272 <test> |
239 <param name="input" value="test.fasta" ftype="fasta"/> | 273 <param name="input" value="test.fasta" ftype="fasta"/> |
240 <param name="select_ftype" value="fasta"/> | 274 <param name="select_ftype" value="fasta"/> |
241 <param name="numnew" value="2"/> | 275 <param name="numnew" value="2"/> |
242 <param name="newfilenames" value="test"/> | 276 <param name="newfilenames" value="test"/> |
243 <output_collection name="list_output_fasta" type="list"> | 277 <output_collection name="list_output_fasta" type="list"> |
244 <element name="test_0.fasta" file="test_0.fasta" ftype="fasta"/> | 278 <element name="test_000000.fasta" file="test_0.fasta" ftype="fasta"/> |
245 <element name="test_1.fasta" file="test_1.fasta" ftype="fasta"/> | 279 <element name="test_000001.fasta" file="test_1.fasta" ftype="fasta"/> |
246 </output_collection> | 280 </output_collection> |
247 </test> | 281 </test> |
248 <test> | 282 <test> |
249 <param name="input" value="test.fastq" ftype="fastq"/> | 283 <param name="input" value="test.fastq" ftype="fastq"/> |
250 <param name="select_ftype" value="fastq"/> | 284 <param name="select_ftype" value="fastq"/> |
251 <param name="numnew" value="2"/> | 285 <param name="numnew" value="2"/> |
252 <param name="newfilenames" value="test"/> | 286 <param name="newfilenames" value="test"/> |
253 <output_collection name="list_output_fastq" type="list"> | 287 <output_collection name="list_output_fastq" type="list"> |
254 <element name="test_0.fastq" file="test_0.fastq" ftype="fastq"/> | 288 <element name="test_000000.fastq" file="test_0.fastq" ftype="fastq"/> |
255 <element name="test_1.fastq" file="test_1.fastq" ftype="fastq"/> | 289 <element name="test_000001.fastq" file="test_1.fastq" ftype="fastq"/> |
256 </output_collection> | 290 </output_collection> |
257 </test> | 291 </test> |
258 <test> | 292 <test> |
259 <param name="input" value="test.fasta" ftype="fasta"/> | 293 <param name="input" value="test.fasta" ftype="fasta"/> |
260 <param name="select_ftype" value="fasta"/> | 294 <param name="select_ftype" value="fasta"/> |
261 <param name="numnew" value="2"/> | 295 <param name="numnew" value="2"/> |
262 <param name="newfilenames" value="rand"/> | 296 <param name="newfilenames" value="rand"/> |
263 <param name="allocate" value="random"/> | 297 <param name="allocate" value="random"/> |
264 <param name="seed" value="1010"/> | 298 <param name="seed" value="1010"/> |
265 <output_collection name="list_output_fasta" type="list"> | 299 <output_collection name="list_output_fasta" type="list"> |
266 <element name="rand_0.fasta" file="rand_0.fasta" ftype="fasta"/> | 300 <element name="rand_000000.fasta" file="rand_0.fasta" ftype="fasta"/> |
267 <element name="rand_1.fasta" file="rand_1.fasta" ftype="fasta"/> | 301 <element name="rand_000001.fasta" file="rand_1.fasta" ftype="fasta"/> |
268 </output_collection> | 302 </output_collection> |
269 </test> | 303 </test> |
270 <test> | 304 <test> |
271 <param name="input" value="test.fasta" ftype="fasta"/> | 305 <param name="input" value="test.fasta" ftype="fasta"/> |
272 <param name="select_ftype" value="fasta"/> | 306 <param name="select_ftype" value="fasta"/> |
273 <param name="numnew" value="2"/> | 307 <param name="numnew" value="2"/> |
274 <param name="newfilenames" value="fasta_batch"/> | 308 <param name="newfilenames" value="fasta_batch"/> |
275 <param name="allocate" value="batch"/> | 309 <param name="allocate" value="batch"/> |
276 <output_collection name="list_output_fasta" type="list"> | 310 <output_collection name="list_output_fasta" type="list"> |
277 <element name="fasta_batch_0.fasta" file="fasta_batch_0.fasta" ftype="fasta"/> | 311 <element name="fasta_batch_000000.fasta" file="fasta_batch_0.fasta" ftype="fasta"/> |
278 <element name="fasta_batch_1.fasta" file="fasta_batch_1.fasta" ftype="fasta"/> | 312 <element name="fasta_batch_000001.fasta" file="fasta_batch_1.fasta" ftype="fasta"/> |
279 </output_collection> | 313 </output_collection> |
280 </test> | 314 </test> |
281 <test> | 315 <test> |
282 <param name="input" value="test.tabular" ftype="txt"/> | 316 <param name="input" value="test.tabular" ftype="txt"/> |
283 <param name="select_ftype" value="txt"/> | 317 <param name="select_ftype" value="txt"/> |
284 <param name="numnew" value="2"/> | 318 <param name="numnew" value="2"/> |
285 <param name="newfilenames" value="test"/> | 319 <param name="newfilenames" value="test"/> |
286 <output_collection name="list_output_txt" type="list"> | 320 <output_collection name="list_output_txt" type="list"> |
287 <element name="test_0.txt" file="test_0.tabular" ftype="txt" lines_diff="1"/> | 321 <element name="test_000000.txt" file="test_0.tabular" ftype="txt" lines_diff="1"/> |
288 <element name="test_1.txt" file="test_1.tabular" ftype="txt" lines_diff="1"/> | 322 <element name="test_000001.txt" file="test_1.tabular" ftype="txt" lines_diff="1"/> |
289 </output_collection> | 323 </output_collection> |
290 </test> | 324 </test> |
291 <test> | 325 <test> |
292 <param name="input" value="test.tabular" ftype="txt"/> | 326 <param name="input" value="test.tabular" ftype="txt"/> |
293 <param name="select_ftype" value="generic"/> | 327 <param name="select_ftype" value="generic"/> |
294 <param name="generic_regex" value="^.*"/> | 328 <param name="generic_regex" value="^.*"/> |
295 <param name="numnew" value="2"/> | 329 <param name="numnew" value="2"/> |
296 <param name="newfilenames" value="test"/> | 330 <param name="newfilenames" value="test"/> |
297 <output_collection name="list_output_generic" type="list"> | 331 <output_collection name="list_output_generic" type="list"> |
298 <element name="test_0" file="test_0.tabular" ftype="txt" lines_diff="1"/> | 332 <element name="test_000000" file="test_0.tabular" ftype="txt" lines_diff="1"/> |
299 <element name="test_1" file="test_1.tabular" ftype="txt" lines_diff="1"/> | 333 <element name="test_000001" file="test_1.tabular" ftype="txt" lines_diff="1"/> |
300 </output_collection> | 334 </output_collection> |
301 </test> | 335 </test> |
302 <test> | 336 <test> |
303 <param name="input" value="test.fasta" ftype="fasta"/> | 337 <param name="input" value="test.fasta" ftype="fasta"/> |
304 <param name="select_ftype" value="generic"/> | 338 <param name="select_ftype" value="generic"/> |
305 <param name="generic_regex" value="^>.*"/> | 339 <param name="generic_regex" value="^>.*"/> |
306 <param name="numnew" value="2"/> | 340 <param name="numnew" value="2"/> |
307 <param name="newfilenames" value="rand"/> | 341 <param name="newfilenames" value="rand"/> |
308 <param name="allocate" value="random"/> | 342 <param name="allocate" value="random"/> |
309 <param name="seed" value="1010"/> | 343 <param name="seed" value="1010"/> |
310 <output_collection name="list_output_generic" type="list"> | 344 <output_collection name="list_output_generic" type="list"> |
311 <element name="rand_0" file="rand_0.fasta" ftype="fasta"/> | 345 <element name="rand_000000" file="rand_0.fasta" ftype="fasta"/> |
312 <element name="rand_1" file="rand_1.fasta" ftype="fasta"/> | 346 <element name="rand_000001" file="rand_1.fasta" ftype="fasta"/> |
313 </output_collection> | 347 </output_collection> |
314 </test> | 348 </test> |
315 <test> | 349 <test> |
316 <param name="input" value="3_molecules.sdf" ftype="sdf"/> | 350 <param name="input" value="3_molecules.sdf" ftype="sdf"/> |
317 <param name="select_ftype" value="generic"/> | 351 <param name="select_ftype" value="generic"/> |
318 <param name="generic_regex" value="^\$\$\$\$.*"/> | 352 <param name="generic_regex" value="^\$\$\$\$.*"/> |
319 <param name="numnew" value="1000"/> | 353 <param name="numnew" value="1000"/> |
320 <param name="newfilenames" value="mol"/> | 354 <param name="newfilenames" value="mol"/> |
321 <param name="allocate" value="batch"/> | 355 <param name="allocate" value="batch"/> |
322 <output_collection name="list_output_generic" type="list"> | 356 <output_collection name="list_output_generic" type="list"> |
323 <element name="mol_0" file="mol_0.sdf" ftype="sdf"/> | 357 <element name="mol_000000" file="mol_0.sdf" ftype="sdf"/> |
324 <element name="mol_1" file="mol_1.sdf" ftype="sdf"/> | 358 <element name="mol_000001" file="mol_1.sdf" ftype="sdf"/> |
325 <element name="mol_2" file="mol_2.sdf" ftype="sdf"/> | 359 <element name="mol_000002" file="mol_2.sdf" ftype="sdf"/> |
326 </output_collection> | 360 </output_collection> |
327 </test> | 361 </test> |
328 </tests> | 362 </tests> |
329 <help><![CDATA[ | 363 <help><![CDATA[ |
330 **Split file into a dataset collection** | 364 **Split file into a dataset collection** |
331 | 365 |
332 This tool splits a data sets consisting of records into multiple data sets within a collection. | 366 This tool splits a data sets consisting of records into multiple data sets within a collection. |
333 A record can be for instance simply a line, a FASTA sequence (header + sequence), a FASTQ sequence | 367 A record can be for instance simply a line, a FASTA sequence (header + sequence), a FASTQ sequence |
334 (headers + sequence + qualities), etc. The important property is that the begin of a new record | 368 (headers + sequence + qualities), etc. The important property is that the begin of a new record |
335 can be speciefied by a regular expression, e.g. ".*" for lines, ">.*" for FASTA, or "@.*" for FASTQ. | 369 can be speciefied by a regular expression, e.g. ".*" for lines, ">.*" for FASTA, or "@.*" for FASTQ. |
336 The tool has presets for text, tabular data sets (which are split by line), FASTA, FASTQ, and MGF. | 370 The tool has presets for text, tabular data sets (which are split by line), FASTA, FASTQ, and MGF. |
337 For other data types the text delimiting records can be specified manually using the generic splitter. | 371 For other data types the text delimiting records can be specified manually using the generic splitter. |
338 | 372 |
339 If splitting by line (or by some other item, like a FASTA entry or an MGF record, the splitting can be either done alternating, in original record order, or at random. | 373 If splitting by line (or by some other item, like a FASTA entry or an MGF record, the splitting can be either done alternating, in original record order, or at random. |
340 | 374 |
341 If t records are to be distributed to n new data sets, then the i-th record goes to data set | 375 If t records are to be distributed to n new data sets, then the i-th record goes to data set |
342 | 376 |
343 * floor(i / t * n) (for batch), | 377 * floor(i / t * n) (for batch), |
344 * i % n (for alternating), or | 378 * i % n (for alternating), or |
345 * a random data set | 379 * a random data set |
346 | 380 |
347 For instance, t=5 records are distributed as follows on n=2 data sets | 381 For instance, t=5 records are distributed as follows on n=2 data sets |
348 | 382 |
366 2 1 2 2 | 400 2 1 2 2 |
367 3 1 0 0 | 401 3 1 0 0 |
368 4 2 1 1 | 402 4 2 1 1 |
369 = === === ==== | 403 = === === ==== |
370 | 404 |
371 Note that there are no guarantees when splitting at random that every result file will be non-empty, so downstream tools should be able to gracefully handle empty files. | 405 Note that there are no guarantees when splitting at random that every result file will be non-empty, so downstream tools should be able to gracefully handle empty files. |
372 | 406 |
373 If a tabular file is used as input, you may choose to split by line or by column. If split by column, a new file is created for each unique value in the column. | 407 If a tabular file is used as input, you may choose to split by line or by column. If split by column, a new file is created for each unique value in the column. |
374 In addition, (Python) regular expressions may be used to transform the value in the column to a new value. Caution should be used with this feature, as it could transform all values to the same value, or other unexpected behavior. | 408 In addition, (Python) regular expressions may be used to transform the value in the column to a new value. Caution should be used with this feature, as it could transform all values to the same value, or other unexpected behavior. |
375 The default regular expression uses each value in the column without modifying it. | 409 The default regular expression uses each value in the column without modifying it. |
376 ]]></help> | 410 ]]></help> |
377 <citations> | 411 <citations> |
378 <citation type="bibtex"> | 412 <citation type="bibtex"> |
379 @misc{githubsplit, | 413 @misc{githubsplit, |
380 author = {Easterly, Caleb}, | 414 author = {Easterly, Caleb}, |