split_file_to_collection: split_file_to

comparison split_file_to_collection.xml @ 3:2ddc36385d7a draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 8d069684e155d2f5b6fae06d14d98ce41321da53"

author	bgruening
date	Tue, 10 Sep 2019 12:31:15 -0400
parents	d150ac3d853d
children	0850f2dfba13

comparison

equal deleted inserted replaced

-:d150ac3d853d
+:2ddc36385d7a
 --by '$split_parms.split_by.select_split_by'
 #if $split_parms.split_by.select_split_by == "col":
 --id_column '$split_parms.split_by.id_col'
 --match '$split_parms.split_by.match_regex'
 --sub '$split_parms.split_by.sub_regex'
 #else
 --numnew '$split_parms.split_by.numnew'
 #if $split_parms.split_by.select_allocate.allocate == "random":
 --rand
 --seed '$split_parms.split_by.rand.seed'
 #end if
 #if $split_parms.split_by.select_allocate.allocate == "batch":
 <param name="input" value="test.tabular" ftype="tabular"/>
 <param name="select_ftype" value="tabular"/>
 <param name="select_split_by" value="row"/>
 <param name="top" value="2"/>
 <param name="numnew" value="2"/>
 <param name="newfilenames" value="test"/>
 <output_collection name="list_output_tab" type="list">
-<element name="test_0.tabular" file="test_0.tabular" ftype="tabular"/>
+<element name="test_000000.tabular" file="test_0.tabular" ftype="tabular"/>
-<element name="test_1.tabular" file="test_1.tabular" ftype="tabular"/>
+<element name="test_000001.tabular" file="test_1.tabular" ftype="tabular"/>
 </output_collection>
 </test>
 <test>
 <param name="input" value="test.tabular" ftype="tabular"/>
 <param name="select_ftype" value="tabular"/>
 <param name="select_split_by" value="row"/>
 <param name="top" value="2"/>
 <param name="numnew" value="2"/>
 <param name="newfilenames" value="batch_tab"/>
 <param name="allocate" value="batch"/>
 <output_collection name="list_output_tab" type="list">
-<element name="batch_tab_0.tabular" file="batch_tab_0.tabular" ftype="tabular"/>
+<element name="batch_tab_000000.tabular" file="batch_tab_0.tabular" ftype="tabular"/>
-<element name="batch_tab_1.tabular" file="batch_tab_1.tabular" ftype="tabular"/>
+<element name="batch_tab_000001.tabular" file="batch_tab_1.tabular" ftype="tabular"/>
+</output_collection>
+</test>
+<test>
+<param name="select_ftype" value="txt"/>
+<param name="input" value="karyotype.txt" ftype="txt"/>
+<param name="numnew" value="24"/>
+<param name="newfilenames" value="chr"/>
+<param name="allocate" value="batch"/>
+<output_collection name="list_output_txt" type="list">
+<element name="chr_000000.txt" file="chr_000000.txt" ftype="txt"/>
+<element name="chr_000001.txt" file="chr_000001.txt" ftype="txt"/>
+<element name="chr_000002.txt" file="chr_000002.txt" ftype="txt"/>
+<element name="chr_000003.txt" file="chr_000003.txt" ftype="txt"/>
+<element name="chr_000004.txt" file="chr_000004.txt" ftype="txt"/>
+<element name="chr_000005.txt" file="chr_000005.txt" ftype="txt"/>
+<element name="chr_000006.txt" file="chr_000006.txt" ftype="txt"/>
+<element name="chr_000007.txt" file="chr_000007.txt" ftype="txt"/>
+<element name="chr_000008.txt" file="chr_000008.txt" ftype="txt"/>
+<element name="chr_000009.txt" file="chr_000009.txt" ftype="txt"/>
+<element name="chr_000010.txt" file="chr_000010.txt" ftype="txt"/>
+<element name="chr_000011.txt" file="chr_000011.txt" ftype="txt"/>
+<element name="chr_000012.txt" file="chr_000012.txt" ftype="txt"/>
+<element name="chr_000013.txt" file="chr_000013.txt" ftype="txt"/>
+<element name="chr_000014.txt" file="chr_000014.txt" ftype="txt"/>
+<element name="chr_000015.txt" file="chr_000015.txt" ftype="txt"/>
+<element name="chr_000016.txt" file="chr_000016.txt" ftype="txt"/>
+<element name="chr_000017.txt" file="chr_000017.txt" ftype="txt"/>
+<element name="chr_000018.txt" file="chr_000018.txt" ftype="txt"/>
+<element name="chr_000019.txt" file="chr_000019.txt" ftype="txt"/>
+<element name="chr_000020.txt" file="chr_000020.txt" ftype="txt"/>
+<element name="chr_000021.txt" file="chr_000021.txt" ftype="txt"/>
+<element name="chr_000022.txt" file="chr_000022.txt" ftype="txt"/>
+<element name="chr_000023.txt" file="chr_000023.txt" ftype="txt"/>
 </output_collection>
 </test>
 <test>
 <param name="input" value="psm.tabular" ftype="tabular"/>
 <param name="select_ftype" value="tabular"/>
 <param name="input" value="demo758Dacentroid.mgf" ftype="mgf"/>
 <param name="select_ftype" value="mgf"/>
 <param name="numnew" value="3"/>
 <param name="newfilenames" value="demo"/>
 <output_collection name="list_output_mgf" type="list">
-<element name="demo_0.mgf" file="demo_0.mgf" ftype="mgf"/>
+<element name="demo_000000.mgf" file="demo_0.mgf" ftype="mgf"/>
-<element name="demo_1.mgf" file="demo_1.mgf" ftype="mgf"/>
+<element name="demo_000001.mgf" file="demo_1.mgf" ftype="mgf"/>
-<element name="demo_2.mgf" file="demo_2.mgf" ftype="mgf"/>
+<element name="demo_000002.mgf" file="demo_2.mgf" ftype="mgf"/>
 </output_collection>
 </test>
 <test>
 <param name="input" value="test.fasta" ftype="fasta"/>
 <param name="select_ftype" value="fasta"/>
 <param name="numnew" value="2"/>
 <param name="newfilenames" value="test"/>
 <output_collection name="list_output_fasta" type="list">
-<element name="test_0.fasta" file="test_0.fasta" ftype="fasta"/>
+<element name="test_000000.fasta" file="test_0.fasta" ftype="fasta"/>
-<element name="test_1.fasta" file="test_1.fasta" ftype="fasta"/>
+<element name="test_000001.fasta" file="test_1.fasta" ftype="fasta"/>
 </output_collection>
 </test>
 <test>
 <param name="input" value="test.fastq" ftype="fastq"/>
 <param name="select_ftype" value="fastq"/>
 <param name="numnew" value="2"/>
 <param name="newfilenames" value="test"/>
 <output_collection name="list_output_fastq" type="list">
-<element name="test_0.fastq" file="test_0.fastq" ftype="fastq"/>
+<element name="test_000000.fastq" file="test_0.fastq" ftype="fastq"/>
-<element name="test_1.fastq" file="test_1.fastq" ftype="fastq"/>
+<element name="test_000001.fastq" file="test_1.fastq" ftype="fastq"/>
 </output_collection>
 </test>
 <test>
 <param name="input" value="test.fasta" ftype="fasta"/>
 <param name="select_ftype" value="fasta"/>
 <param name="numnew" value="2"/>
 <param name="newfilenames" value="rand"/>
 <param name="allocate" value="random"/>
 <param name="seed" value="1010"/>
 <output_collection name="list_output_fasta" type="list">
-<element name="rand_0.fasta" file="rand_0.fasta" ftype="fasta"/>
+<element name="rand_000000.fasta" file="rand_0.fasta" ftype="fasta"/>
-<element name="rand_1.fasta" file="rand_1.fasta" ftype="fasta"/>
+<element name="rand_000001.fasta" file="rand_1.fasta" ftype="fasta"/>
 </output_collection>
 </test>
 <test>
 <param name="input" value="test.fasta" ftype="fasta"/>
 <param name="select_ftype" value="fasta"/>
 <param name="numnew" value="2"/>
 <param name="newfilenames" value="fasta_batch"/>
 <param name="allocate" value="batch"/>
 <output_collection name="list_output_fasta" type="list">
-<element name="fasta_batch_0.fasta" file="fasta_batch_0.fasta" ftype="fasta"/>
+<element name="fasta_batch_000000.fasta" file="fasta_batch_0.fasta" ftype="fasta"/>
-<element name="fasta_batch_1.fasta" file="fasta_batch_1.fasta" ftype="fasta"/>
+<element name="fasta_batch_000001.fasta" file="fasta_batch_1.fasta" ftype="fasta"/>
 </output_collection>
 </test>
 <test>
 <param name="input" value="test.tabular" ftype="txt"/>
 <param name="select_ftype" value="txt"/>
 <param name="numnew" value="2"/>
 <param name="newfilenames" value="test"/>
 <output_collection name="list_output_txt" type="list">
-<element name="test_0.txt" file="test_0.tabular" ftype="txt" lines_diff="1"/>
+<element name="test_000000.txt" file="test_0.tabular" ftype="txt" lines_diff="1"/>
-<element name="test_1.txt" file="test_1.tabular" ftype="txt" lines_diff="1"/>
+<element name="test_000001.txt" file="test_1.tabular" ftype="txt" lines_diff="1"/>
 </output_collection>
 </test>
 <test>
 <param name="input" value="test.tabular" ftype="txt"/>
 <param name="select_ftype" value="generic"/>
 <param name="generic_regex" value="^.*"/>
 <param name="numnew" value="2"/>
 <param name="newfilenames" value="test"/>
 <output_collection name="list_output_generic" type="list">
-<element name="test_0" file="test_0.tabular" ftype="txt" lines_diff="1"/>
+<element name="test_000000" file="test_0.tabular" ftype="txt" lines_diff="1"/>
-<element name="test_1" file="test_1.tabular" ftype="txt" lines_diff="1"/>
+<element name="test_000001" file="test_1.tabular" ftype="txt" lines_diff="1"/>
 </output_collection>
 </test>
 <test>
 <param name="input" value="test.fasta" ftype="fasta"/>
 <param name="select_ftype" value="generic"/>
 <param name="generic_regex" value="^>.*"/>
 <param name="numnew" value="2"/>
 <param name="newfilenames" value="rand"/>
 <param name="allocate" value="random"/>
 <param name="seed" value="1010"/>
 <output_collection name="list_output_generic" type="list">
-<element name="rand_0" file="rand_0.fasta" ftype="fasta"/>
+<element name="rand_000000" file="rand_0.fasta" ftype="fasta"/>
-<element name="rand_1" file="rand_1.fasta" ftype="fasta"/>
+<element name="rand_000001" file="rand_1.fasta" ftype="fasta"/>
 </output_collection>
 </test>
 <test>
 <param name="input" value="3_molecules.sdf" ftype="sdf"/>
 <param name="select_ftype" value="generic"/>
 <param name="generic_regex" value="^\$\$\$\$.*"/>
 <param name="numnew" value="1000"/>
 <param name="newfilenames" value="mol"/>
 <param name="allocate" value="batch"/>
 <output_collection name="list_output_generic" type="list">
-<element name="mol_0" file="mol_0.sdf" ftype="sdf"/>
+<element name="mol_000000" file="mol_0.sdf" ftype="sdf"/>
-<element name="mol_1" file="mol_1.sdf" ftype="sdf"/>
+<element name="mol_000001" file="mol_1.sdf" ftype="sdf"/>
-<element name="mol_2" file="mol_2.sdf" ftype="sdf"/>
+<element name="mol_000002" file="mol_2.sdf" ftype="sdf"/>
 </output_collection>
 </test>
 </tests>
 <help><![CDATA[
 **Split file into a dataset collection**
 This tool splits a data sets consisting of records into multiple data sets within a collection.
 A record can be for instance simply a line, a FASTA sequence (header + sequence), a FASTQ sequence
 (headers + sequence + qualities), etc. The important property is that the begin of a new record
 can be speciefied by a regular expression, e.g. ".*" for lines, ">.*" for FASTA, or "@.*" for FASTQ.
 The tool has presets for text, tabular data sets (which are split by line), FASTA, FASTQ, and MGF.
 For other data types the text delimiting records can be specified manually using the generic splitter.
 If splitting by line (or by some other item, like a FASTA entry or an MGF record, the splitting can be either done alternating, in original record order, or at random.
 If t records are to be distributed to n new data sets, then the i-th record goes to data set
 * floor(i / t * n) (for batch),
 * i % n (for alternating), or
 * a random data set
 For instance, t=5 records are distributed as follows on n=2 data sets
 2 1   2   2
 3 1   0   0
 4 2   1   1
 = === === ====
 Note that there are no guarantees when splitting at random that every result file will be non-empty, so downstream tools should be able to gracefully handle empty files.
 If a tabular file is used as input, you may choose to split by line or by column. If split by column, a new file is created for each unique value in the column.
 In addition, (Python) regular expressions may be used to transform the value in the column to a new value. Caution should be used with this feature, as it could transform all values to the same value, or other unexpected behavior.
 The default regular expression uses each value in the column without modifying it.
 ]]></help>
 <citations>
 <citation type="bibtex">
 @misc{githubsplit,
 author = {Easterly, Caleb},

Mercurial > repos > bgruening > split_file_to_collection

comparison split_file_to_collection.xml @ 3:2ddc36385d7a draft