Repository 'split_file_to_collection'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/split_file_to_collection

Changeset 10:2dae863c8f42 (2024-05-23)
Previous changeset 9:baabc30154cd (2023-11-23)
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 5d21f3d6a3a84b4737a2091ae0d772471eb389dd
modified:
split_file_to_collection.py
split_file_to_collection.xml
added:
test-data/split_file_0.tabular
test-data/split_file_1.tabular
test-data/split_file_2.tabular
test-data/split_file_3.tabular
b
diff -r baabc30154cd -r 2dae863c8f42 split_file_to_collection.py
--- a/split_file_to_collection.py Thu Nov 23 20:02:01 2023 +0000
+++ b/split_file_to_collection.py Thu May 23 15:03:47 2024 +0000
b
@@ -329,6 +329,8 @@
             else:
                 record += line
         # after loop, write final record to file
+        if new_file_counter in fresh_files:
+            new_file.write(header)
         new_file.write(record)
         new_file.close()
 
b
diff -r baabc30154cd -r 2dae863c8f42 split_file_to_collection.xml
--- a/split_file_to_collection.xml Thu Nov 23 20:02:01 2023 +0000
+++ b/split_file_to_collection.xml Thu May 23 15:03:47 2024 +0000
[
b'@@ -1,4 +1,4 @@\n-<tool id="split_file_to_collection" name="Split file" version="0.5.1">\n+<tool id="split_file_to_collection" name="Split file" version="0.5.2">\n     <description>to dataset collection</description>\n     <macros>\n         <xml name="regex_sanitizer">\n@@ -190,38 +190,38 @@\n         </conditional>\n     </inputs>\n     <outputs>\n-        <collection name="list_output_tab" type="list" label="${tool.name} on ${on_string}">\n+        <collection name="list_output_tab" type="list" label="${tool.name} on ${on_string}: tabular">\n             <discover_datasets pattern="__name__" directory="out" visible="false" format="tabular"/>\n             <filter>split_parms[\'select_ftype\'] == "tabular"</filter>\n         </collection>\n-        <collection name="list_output_mgf" type="list" label="${tool.name} on ${on_string}">\n+        <collection name="list_output_mgf" type="list" label="${tool.name} on ${on_string}: mgf">\n             <discover_datasets pattern="__name__" directory="out" visible="false" format="mgf"/>\n             <filter>split_parms[\'select_ftype\'] == "mgf"</filter>\n         </collection>\n-        <collection name="list_output_fasta" type="list" label="${tool.name} on ${on_string}">\n+        <collection name="list_output_fasta" type="list" label="${tool.name} on ${on_string}: fasta">\n             <discover_datasets pattern="__name__" directory="out" visible="false" format="fasta"/>\n             <filter>split_parms[\'select_ftype\'] == "fasta"</filter>\n         </collection>\n-        <collection name="list_output_fastq" type="list" label="${tool.name} on ${on_string}">\n+        <collection name="list_output_fastq" type="list" label="${tool.name} on ${on_string}: fastq">\n             <discover_datasets pattern="__name__" directory="out" visible="false" format="fastq"/>\n             <filter>split_parms[\'select_ftype\'] == "fastq"</filter>\n         </collection>\n-        <collection name="list_output_sdf" type="list" label="${tool.name} on ${on_string}">\n+        <collection name="list_output_sdf" type="list" label="${tool.name} on ${on_string}: sdf">\n             <discover_datasets pattern="__name__" directory="out" visible="false" format="sdf"/>\n             <filter>split_parms[\'select_ftype\'] == "sdf"</filter>\n         </collection>\n-        <collection name="list_output_txt" type="list" label="${tool.name} on ${on_string}">\n+        <collection name="list_output_txt" type="list" label="${tool.name} on ${on_string}: txt">\n             <discover_datasets pattern="__name__" directory="out" visible="false" format="txt"/>\n             <filter>split_parms[\'select_ftype\'] == "txt"</filter>\n         </collection>\n-        <collection name="list_output_generic" type="list" label="${tool.name} on ${on_string}">\n+        <collection name="list_output_generic" type="list" label="${tool.name} on ${on_string}: generic">\n             <discover_datasets pattern="__name_and_ext__" directory="out" visible="false"/>\n             <filter>split_parms[\'select_ftype\'] == "generic"</filter>\n         </collection>\n     </outputs>\n     <tests>\n         <!-- 1 -->\n-        <test>\n+        <test expect_num_outputs="1">\n             <param name="input" value="test.tabular" ftype="tabular"/>\n             <param name="select_ftype" value="tabular"/>\n             <param name="select_split_by" value="col"/>\n@@ -236,7 +236,7 @@\n             </output_collection>\n         </test>\n         <!-- 2 -->\n-        <test>\n+        <test expect_num_outputs="1">\n             <param name="input" value="test.tabular" ftype="tabular"/>\n             <param name="select_ftype" value="tabular"/>\n             <param name="select_split_by" value="row"/>\n@@ -250,7 +250,7 @@\n             </output_collection>\n         </test>\n         <!-- 3 -->\n-        <test>\n+        <test expect_num_outputs="1">\n             <param name="input" value="test.tabular" ftype="tabular"/>\n             <param name="select_ftype" value="tabular"/>\n             <param name="select_split_by" valu'..b'="test.fasta" ftype="fasta"/>\n             <param name="select_ftype" value="fasta"/>\n             <param name="mode" value="numnew"/>\n@@ -411,7 +411,7 @@\n             </output_collection>\n         </test>\n         <!-- splitting of fasta w batch assignment and specific filename prefix -->\n-        <test>\n+        <test expect_num_outputs="1">\n             <param name="input" value="test.fasta" ftype="fasta"/>\n             <param name="select_ftype" value="fasta"/>\n             <param name="mode" value="numnew"/>\n@@ -424,7 +424,7 @@\n             </output_collection>\n         </test>\n         <!-- splitting of txt w default (alternating assignment) -->\n-        <test>\n+        <test expect_num_outputs="1">\n             <param name="input" value="test.tabular" ftype="txt"/>\n             <param name="select_ftype" value="txt"/>\n             <param name="mode" value="numnew"/>\n@@ -436,7 +436,7 @@\n             </output_collection>\n         </test>\n         <!-- generic-regex splitting (of txt) w default assignement (alternating) -->\n-        <test>\n+        <test expect_num_outputs="1">\n             <param name="input" value="test.tabular" ftype="txt"/>\n             <param name="select_ftype" value="generic"/>\n             <param name="select_split_method" value="regex"/>\n@@ -450,7 +450,7 @@\n             </output_collection>\n         </test>\n         <!-- generic-regex splitting (of a fasta) w random assignment -->\n-        <test>\n+        <test expect_num_outputs="1">\n             <param name="input" value="test.fasta" ftype="fasta"/>\n             <param name="select_ftype" value="generic"/>\n             <param name="select_split_method" value="regex"/>\n@@ -466,7 +466,7 @@\n             </output_collection>\n         </test>\n         <!-- sdf + specify desired number of files -->\n-        <test>\n+        <test expect_num_outputs="1">\n             <param name="input" value="3_molecules.sdf" ftype="sdf"/>\n             <param name="select_ftype" value="sdf"/>\n             <param name="mode" value="numnew"/>\n@@ -480,7 +480,7 @@\n             </output_collection>\n         </test>\n         <!-- sdf + specify desired number of records per file (chunksize) -->\n-        <test>\n+        <test expect_num_outputs="1">\n             <param name="input" value="3_molecules.sdf" ftype="sdf"/>\n             <param name="select_ftype" value="sdf"/>\n             <param name="mode" value="chunk"/>\n@@ -494,7 +494,7 @@\n             </output_collection>\n         </test>\n         <!-- test split_after (by splitting fasta files after non-header lines) -->\n-        <test>\n+        <test expect_num_outputs="1">\n             <param name="input" value="test.fasta" ftype="fasta"/>\n             <param name="select_ftype" value="generic"/>\n             <param name="select_split_method" value="regex"/>\n@@ -510,6 +510,21 @@\n                 <element name="rand_000001" file="rand_1.fasta" ftype="fasta"/>\n             </output_collection>\n         </test>\n+        <test expect_num_outputs="1">\n+            <param name="input" value="test.tabular" ftype="tabular"/>\n+            <param name="select_ftype" value="tabular"/>\n+            <param name="select_split_by" value="row"/>\n+            <param name="top" value="2"/>\n+            <param name="mode" value="chunk"/>\n+            <param name="chunksize" value="1"/>\n+            <param name="allocate" value="batch"/>\n+            <output_collection name="list_output_tab" type="list">\n+                <element name="split_file_000000.tabular" file="split_file_0.tabular" ftype="tabular"/>\n+                <element name="split_file_000001.tabular" file="split_file_1.tabular" ftype="tabular"/>\n+                <element name="split_file_000002.tabular" file="split_file_2.tabular" ftype="tabular"/>\n+                <element name="split_file_000003.tabular" file="split_file_3.tabular" ftype="tabular"/>\n+            </output_collection>\n+        </test>\n     </tests>\n     <help><![CDATA[\n **Split file into a dataset collection**\n'
b
diff -r baabc30154cd -r 2dae863c8f42 test-data/split_file_0.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split_file_0.tabular Thu May 23 15:03:47 2024 +0000
b
@@ -0,0 +1,3 @@
+#This is a file
+#file   data
+foo.mgf bar
b
diff -r baabc30154cd -r 2dae863c8f42 test-data/split_file_1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split_file_1.tabular Thu May 23 15:03:47 2024 +0000
b
@@ -0,0 +1,3 @@
+#This is a file
+#file   data
+foo2.mgf bar2
b
diff -r baabc30154cd -r 2dae863c8f42 test-data/split_file_2.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split_file_2.tabular Thu May 23 15:03:47 2024 +0000
b
@@ -0,0 +1,3 @@
+#This is a file
+#file   data
+foo3.mgf bar3
b
diff -r baabc30154cd -r 2dae863c8f42 test-data/split_file_3.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split_file_3.tabular Thu May 23 15:03:47 2024 +0000
b
@@ -0,0 +1,3 @@
+#This is a file
+#file   data
+foo.mgf bar4