Mercurial > repos > bgruening > split_file_to_collection

diff split_file_to_collection.xml @ 3:2ddc36385d7a draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 8d069684e155d2f5b6fae06d14d98ce41321da53"
author: bgruening
date: Tue, 10 Sep 2019 12:31:15 -0400
parents: d150ac3d853d
children: 0850f2dfba13
--- a/split_file_to_collection.xml	Wed Aug 28 10:55:25 2019 -0400
+++ b/split_file_to_collection.xml	Tue Sep 10 12:31:15 2019 -0400
@@ -50,8 +50,8 @@
                     --id_column '$split_parms.split_by.id_col'
                     --match '$split_parms.split_by.match_regex'
                     --sub '$split_parms.split_by.sub_regex'
-                #else 
-                    --numnew '$split_parms.split_by.numnew' 
+                #else
+                    --numnew '$split_parms.split_by.numnew'
                     #if $split_parms.split_by.select_allocate.allocate == "random":
                         --rand
                         --seed '$split_parms.split_by.rand.seed'
@@ -190,10 +190,10 @@
             <param name="select_split_by" value="row"/>
             <param name="top" value="2"/>
             <param name="numnew" value="2"/>
-            <param name="newfilenames" value="test"/> 
+            <param name="newfilenames" value="test"/>
             <output_collection name="list_output_tab" type="list">
-                <element name="test_0.tabular" file="test_0.tabular" ftype="tabular"/>
-                <element name="test_1.tabular" file="test_1.tabular" ftype="tabular"/>
+                <element name="test_000000.tabular" file="test_0.tabular" ftype="tabular"/>
+                <element name="test_000001.tabular" file="test_1.tabular" ftype="tabular"/>
             </output_collection>
         </test>
         <test>
@@ -203,10 +203,44 @@
             <param name="top" value="2"/>
             <param name="numnew" value="2"/>
             <param name="newfilenames" value="batch_tab"/>
-            <param name="allocate" value="batch"/> 
+            <param name="allocate" value="batch"/>
             <output_collection name="list_output_tab" type="list">
-                <element name="batch_tab_0.tabular" file="batch_tab_0.tabular" ftype="tabular"/>
-                <element name="batch_tab_1.tabular" file="batch_tab_1.tabular" ftype="tabular"/>
+                <element name="batch_tab_000000.tabular" file="batch_tab_0.tabular" ftype="tabular"/>
+                <element name="batch_tab_000001.tabular" file="batch_tab_1.tabular" ftype="tabular"/>
+            </output_collection>
+        </test>
+        <test>
+            <param name="select_ftype" value="txt"/>
+            <param name="input" value="karyotype.txt" ftype="txt"/>
+            <param name="numnew" value="24"/>
+            <param name="newfilenames" value="chr"/>
+            <param name="allocate" value="batch"/>
+
+            <output_collection name="list_output_txt" type="list">
+                <element name="chr_000000.txt" file="chr_000000.txt" ftype="txt"/>
+                <element name="chr_000001.txt" file="chr_000001.txt" ftype="txt"/>
+                <element name="chr_000002.txt" file="chr_000002.txt" ftype="txt"/>
+                <element name="chr_000003.txt" file="chr_000003.txt" ftype="txt"/>
+                <element name="chr_000004.txt" file="chr_000004.txt" ftype="txt"/>
+                <element name="chr_000005.txt" file="chr_000005.txt" ftype="txt"/>
+                <element name="chr_000006.txt" file="chr_000006.txt" ftype="txt"/>
+                <element name="chr_000007.txt" file="chr_000007.txt" ftype="txt"/>
+                <element name="chr_000008.txt" file="chr_000008.txt" ftype="txt"/>
+                <element name="chr_000009.txt" file="chr_000009.txt" ftype="txt"/>
+                <element name="chr_000010.txt" file="chr_000010.txt" ftype="txt"/>
+                <element name="chr_000011.txt" file="chr_000011.txt" ftype="txt"/>
+                <element name="chr_000012.txt" file="chr_000012.txt" ftype="txt"/>
+                <element name="chr_000013.txt" file="chr_000013.txt" ftype="txt"/>
+                <element name="chr_000014.txt" file="chr_000014.txt" ftype="txt"/>
+                <element name="chr_000015.txt" file="chr_000015.txt" ftype="txt"/>
+                <element name="chr_000016.txt" file="chr_000016.txt" ftype="txt"/>
+                <element name="chr_000017.txt" file="chr_000017.txt" ftype="txt"/>
+                <element name="chr_000018.txt" file="chr_000018.txt" ftype="txt"/>
+                <element name="chr_000019.txt" file="chr_000019.txt" ftype="txt"/>
+                <element name="chr_000020.txt" file="chr_000020.txt" ftype="txt"/>
+                <element name="chr_000021.txt" file="chr_000021.txt" ftype="txt"/>
+                <element name="chr_000022.txt" file="chr_000022.txt" ftype="txt"/>
+                <element name="chr_000023.txt" file="chr_000023.txt" ftype="txt"/>
             </output_collection>
         </test>
         <test>
@@ -230,29 +264,29 @@
             <param name="numnew" value="3"/>
             <param name="newfilenames" value="demo"/>
             <output_collection name="list_output_mgf" type="list">
-                <element name="demo_0.mgf" file="demo_0.mgf" ftype="mgf"/>
-                <element name="demo_1.mgf" file="demo_1.mgf" ftype="mgf"/>
-                <element name="demo_2.mgf" file="demo_2.mgf" ftype="mgf"/>
+                <element name="demo_000000.mgf" file="demo_0.mgf" ftype="mgf"/>
+                <element name="demo_000001.mgf" file="demo_1.mgf" ftype="mgf"/>
+                <element name="demo_000002.mgf" file="demo_2.mgf" ftype="mgf"/>
             </output_collection>
         </test>
         <test>
             <param name="input" value="test.fasta" ftype="fasta"/>
             <param name="select_ftype" value="fasta"/>
             <param name="numnew" value="2"/>
-            <param name="newfilenames" value="test"/> 
+            <param name="newfilenames" value="test"/>
             <output_collection name="list_output_fasta" type="list">
-                <element name="test_0.fasta" file="test_0.fasta" ftype="fasta"/>
-                <element name="test_1.fasta" file="test_1.fasta" ftype="fasta"/>
+                <element name="test_000000.fasta" file="test_0.fasta" ftype="fasta"/>
+                <element name="test_000001.fasta" file="test_1.fasta" ftype="fasta"/>
             </output_collection>
         </test>
         <test>
             <param name="input" value="test.fastq" ftype="fastq"/>
             <param name="select_ftype" value="fastq"/>
             <param name="numnew" value="2"/>
-            <param name="newfilenames" value="test"/> 
+            <param name="newfilenames" value="test"/>
             <output_collection name="list_output_fastq" type="list">
-                <element name="test_0.fastq" file="test_0.fastq" ftype="fastq"/>
-                <element name="test_1.fastq" file="test_1.fastq" ftype="fastq"/>
+                <element name="test_000000.fastq" file="test_0.fastq" ftype="fastq"/>
+                <element name="test_000001.fastq" file="test_1.fastq" ftype="fastq"/>
             </output_collection>
         </test>
         <test>
@@ -261,10 +295,10 @@
             <param name="numnew" value="2"/>
             <param name="newfilenames" value="rand"/>
             <param name="allocate" value="random"/>
-            <param name="seed" value="1010"/> 
+            <param name="seed" value="1010"/>
             <output_collection name="list_output_fasta" type="list">
-                <element name="rand_0.fasta" file="rand_0.fasta" ftype="fasta"/>
-                <element name="rand_1.fasta" file="rand_1.fasta" ftype="fasta"/>
+                <element name="rand_000000.fasta" file="rand_0.fasta" ftype="fasta"/>
+                <element name="rand_000001.fasta" file="rand_1.fasta" ftype="fasta"/>
             </output_collection>
         </test>
         <test>
@@ -274,18 +308,18 @@
             <param name="newfilenames" value="fasta_batch"/>
             <param name="allocate" value="batch"/>
             <output_collection name="list_output_fasta" type="list">
-                <element name="fasta_batch_0.fasta" file="fasta_batch_0.fasta" ftype="fasta"/>
-                <element name="fasta_batch_1.fasta" file="fasta_batch_1.fasta" ftype="fasta"/>
+                <element name="fasta_batch_000000.fasta" file="fasta_batch_0.fasta" ftype="fasta"/>
+                <element name="fasta_batch_000001.fasta" file="fasta_batch_1.fasta" ftype="fasta"/>
             </output_collection>
-        </test> 
+        </test>
         <test>
             <param name="input" value="test.tabular" ftype="txt"/>
             <param name="select_ftype" value="txt"/>
             <param name="numnew" value="2"/>
-            <param name="newfilenames" value="test"/> 
+            <param name="newfilenames" value="test"/>
             <output_collection name="list_output_txt" type="list">
-                <element name="test_0.txt" file="test_0.tabular" ftype="txt" lines_diff="1"/>
-                <element name="test_1.txt" file="test_1.tabular" ftype="txt" lines_diff="1"/>
+                <element name="test_000000.txt" file="test_0.tabular" ftype="txt" lines_diff="1"/>
+                <element name="test_000001.txt" file="test_1.tabular" ftype="txt" lines_diff="1"/>
             </output_collection>
         </test>
         <test>
@@ -293,10 +327,10 @@
             <param name="select_ftype" value="generic"/>
             <param name="generic_regex" value="^.*"/>
             <param name="numnew" value="2"/>
-            <param name="newfilenames" value="test"/> 
+            <param name="newfilenames" value="test"/>
             <output_collection name="list_output_generic" type="list">
-                <element name="test_0" file="test_0.tabular" ftype="txt" lines_diff="1"/>
-                <element name="test_1" file="test_1.tabular" ftype="txt" lines_diff="1"/>
+                <element name="test_000000" file="test_0.tabular" ftype="txt" lines_diff="1"/>
+                <element name="test_000001" file="test_1.tabular" ftype="txt" lines_diff="1"/>
             </output_collection>
         </test>
         <test>
@@ -306,10 +340,10 @@
             <param name="numnew" value="2"/>
             <param name="newfilenames" value="rand"/>
             <param name="allocate" value="random"/>
-            <param name="seed" value="1010"/> 
+            <param name="seed" value="1010"/>
             <output_collection name="list_output_generic" type="list">
-                <element name="rand_0" file="rand_0.fasta" ftype="fasta"/>
-                <element name="rand_1" file="rand_1.fasta" ftype="fasta"/>
+                <element name="rand_000000" file="rand_0.fasta" ftype="fasta"/>
+                <element name="rand_000001" file="rand_1.fasta" ftype="fasta"/>
             </output_collection>
         </test>
         <test>
@@ -320,27 +354,27 @@
             <param name="newfilenames" value="mol"/>
             <param name="allocate" value="batch"/>
             <output_collection name="list_output_generic" type="list">
-                <element name="mol_0" file="mol_0.sdf" ftype="sdf"/>
-                <element name="mol_1" file="mol_1.sdf" ftype="sdf"/>
-                <element name="mol_2" file="mol_2.sdf" ftype="sdf"/>
+                <element name="mol_000000" file="mol_0.sdf" ftype="sdf"/>
+                <element name="mol_000001" file="mol_1.sdf" ftype="sdf"/>
+                <element name="mol_000002" file="mol_2.sdf" ftype="sdf"/>
             </output_collection>
         </test>
     </tests>
     <help><![CDATA[
 **Split file into a dataset collection**
 
-This tool splits a data sets consisting of records into multiple data sets within a collection. 
+This tool splits a data sets consisting of records into multiple data sets within a collection.
 A record can be for instance simply a line, a FASTA sequence (header + sequence), a FASTQ sequence
 (headers + sequence + qualities), etc. The important property is that the begin of a new record
-can be speciefied by a regular expression, e.g. ".*" for lines, ">.*" for FASTA, or "@.*" for FASTQ. 
-The tool has presets for text, tabular data sets (which are split by line), FASTA, FASTQ, and MGF. 
-For other data types the text delimiting records can be specified manually using the generic splitter. 
+can be speciefied by a regular expression, e.g. ".*" for lines, ">.*" for FASTA, or "@.*" for FASTQ.
+The tool has presets for text, tabular data sets (which are split by line), FASTA, FASTQ, and MGF.
+For other data types the text delimiting records can be specified manually using the generic splitter.
 
-If splitting by line (or by some other item, like a FASTA entry or an MGF record, the splitting can be either done alternating, in original record order, or at random. 
+If splitting by line (or by some other item, like a FASTA entry or an MGF record, the splitting can be either done alternating, in original record order, or at random.
 
 If t records are to be distributed to n new data sets, then the i-th record goes to data set
 
-* floor(i / t * n) (for batch), 
+* floor(i / t * n) (for batch),
 * i % n (for alternating), or
 * a random data set
 
@@ -368,11 +402,11 @@
 4 2   1   1
 = === === ====
 
-Note that there are no guarantees when splitting at random that every result file will be non-empty, so downstream tools should be able to gracefully handle empty files. 
+Note that there are no guarantees when splitting at random that every result file will be non-empty, so downstream tools should be able to gracefully handle empty files.
 
 If a tabular file is used as input, you may choose to split by line or by column. If split by column, a new file is created for each unique value in the column.
 In addition, (Python) regular expressions may be used to transform the value in the column to a new value. Caution should be used with this feature, as it could transform all values to the same value, or other unexpected behavior.
-The default regular expression uses each value in the column without modifying it. 
+The default regular expression uses each value in the column without modifying it.
     ]]></help>
     <citations>
         <citation type="bibtex">
author	bgruening
date	Tue, 10 Sep 2019 12:31:15 -0400
parents	d150ac3d853d
children	0850f2dfba13