Mercurial > repos > bgruening > split_file_to_collection
changeset 10:2dae863c8f42 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 5d21f3d6a3a84b4737a2091ae0d772471eb389dd
author | bgruening |
---|---|
date | Thu, 23 May 2024 15:03:47 +0000 |
parents | baabc30154cd |
children | |
files | split_file_to_collection.py split_file_to_collection.xml test-data/split_file_0.tabular test-data/split_file_1.tabular test-data/split_file_2.tabular test-data/split_file_3.tabular |
diffstat | 6 files changed, 56 insertions(+), 27 deletions(-) [+] |
line wrap: on
line diff
--- a/split_file_to_collection.py Thu Nov 23 20:02:01 2023 +0000 +++ b/split_file_to_collection.py Thu May 23 15:03:47 2024 +0000 @@ -329,6 +329,8 @@ else: record += line # after loop, write final record to file + if new_file_counter in fresh_files: + new_file.write(header) new_file.write(record) new_file.close()
--- a/split_file_to_collection.xml Thu Nov 23 20:02:01 2023 +0000 +++ b/split_file_to_collection.xml Thu May 23 15:03:47 2024 +0000 @@ -1,4 +1,4 @@ -<tool id="split_file_to_collection" name="Split file" version="0.5.1"> +<tool id="split_file_to_collection" name="Split file" version="0.5.2"> <description>to dataset collection</description> <macros> <xml name="regex_sanitizer"> @@ -190,38 +190,38 @@ </conditional> </inputs> <outputs> - <collection name="list_output_tab" type="list" label="${tool.name} on ${on_string}"> + <collection name="list_output_tab" type="list" label="${tool.name} on ${on_string}: tabular"> <discover_datasets pattern="__name__" directory="out" visible="false" format="tabular"/> <filter>split_parms['select_ftype'] == "tabular"</filter> </collection> - <collection name="list_output_mgf" type="list" label="${tool.name} on ${on_string}"> + <collection name="list_output_mgf" type="list" label="${tool.name} on ${on_string}: mgf"> <discover_datasets pattern="__name__" directory="out" visible="false" format="mgf"/> <filter>split_parms['select_ftype'] == "mgf"</filter> </collection> - <collection name="list_output_fasta" type="list" label="${tool.name} on ${on_string}"> + <collection name="list_output_fasta" type="list" label="${tool.name} on ${on_string}: fasta"> <discover_datasets pattern="__name__" directory="out" visible="false" format="fasta"/> <filter>split_parms['select_ftype'] == "fasta"</filter> </collection> - <collection name="list_output_fastq" type="list" label="${tool.name} on ${on_string}"> + <collection name="list_output_fastq" type="list" label="${tool.name} on ${on_string}: fastq"> <discover_datasets pattern="__name__" directory="out" visible="false" format="fastq"/> <filter>split_parms['select_ftype'] == "fastq"</filter> </collection> - <collection name="list_output_sdf" type="list" label="${tool.name} on ${on_string}"> + <collection name="list_output_sdf" type="list" label="${tool.name} on ${on_string}: sdf"> <discover_datasets pattern="__name__" directory="out" visible="false" format="sdf"/> <filter>split_parms['select_ftype'] == "sdf"</filter> </collection> - <collection name="list_output_txt" type="list" label="${tool.name} on ${on_string}"> + <collection name="list_output_txt" type="list" label="${tool.name} on ${on_string}: txt"> <discover_datasets pattern="__name__" directory="out" visible="false" format="txt"/> <filter>split_parms['select_ftype'] == "txt"</filter> </collection> - <collection name="list_output_generic" type="list" label="${tool.name} on ${on_string}"> + <collection name="list_output_generic" type="list" label="${tool.name} on ${on_string}: generic"> <discover_datasets pattern="__name_and_ext__" directory="out" visible="false"/> <filter>split_parms['select_ftype'] == "generic"</filter> </collection> </outputs> <tests> <!-- 1 --> - <test> + <test expect_num_outputs="1"> <param name="input" value="test.tabular" ftype="tabular"/> <param name="select_ftype" value="tabular"/> <param name="select_split_by" value="col"/> @@ -236,7 +236,7 @@ </output_collection> </test> <!-- 2 --> - <test> + <test expect_num_outputs="1"> <param name="input" value="test.tabular" ftype="tabular"/> <param name="select_ftype" value="tabular"/> <param name="select_split_by" value="row"/> @@ -250,7 +250,7 @@ </output_collection> </test> <!-- 3 --> - <test> + <test expect_num_outputs="1"> <param name="input" value="test.tabular" ftype="tabular"/> <param name="select_ftype" value="tabular"/> <param name="select_split_by" value="row"/> @@ -265,7 +265,7 @@ </output_collection> </test> <!-- 4 --> - <test> + <test expect_num_outputs="1"> <param name="input" value="test.tabular" ftype="tabular"/> <param name="select_ftype" value="tabular"/> <param name="select_split_by" value="row"/> @@ -280,7 +280,7 @@ </output_collection> </test> <!-- 5 --> - <test> + <test expect_num_outputs="1"> <param name="select_ftype" value="txt"/> <param name="input" value="karyotype.txt" ftype="txt"/> <param name="mode" value="numnew"/> @@ -316,7 +316,7 @@ </output_collection> </test> <!-- 6 --> - <test> + <test expect_num_outputs="1"> <param name="input" value="psm.tabular" ftype="tabular"/> <param name="select_ftype" value="tabular"/> <param name="select_split_by" value="col"/> @@ -332,7 +332,7 @@ </output_collection> </test> <!-- 7 splitting of mgf --> - <test> + <test expect_num_outputs="1"> <param name="input" value="demo758Dacentroid.mgf" ftype="mgf"/> <param name="select_ftype" value="mgf"/> <param name="mode" value="numnew"/> @@ -345,7 +345,7 @@ </output_collection> </test> <!-- 8 splitting of fasta + desired number of files--> - <test> + <test expect_num_outputs="1"> <param name="input" value="test.fasta" ftype="fasta"/> <param name="select_ftype" value="fasta"/> <param name="mode" value="numnew"/> @@ -357,7 +357,7 @@ </output_collection> </test> <!-- 9 splitting of fasta + desired chunksize --> - <test> + <test expect_num_outputs="1"> <param name="input" value="test.fasta" ftype="fasta"/> <param name="select_ftype" value="fasta"/> <param name="mode" value="chunk"/> @@ -369,7 +369,7 @@ </output_collection> </test> <!-- 10 splitting of fastq, specify desired number of files --> - <test> + <test expect_num_outputs="1"> <param name="input" value="test.fastq" ftype="fastq"/> <param name="select_ftype" value="fastq"/> <param name="mode" value="numnew"/> @@ -383,7 +383,7 @@ <!-- 11 splitting of fastq, specify desired number of files same as previous test, but by specifying the number of lines per record explicitely (not using the preset of the python script) --> - <test> + <test expect_num_outputs="1"> <param name="input" value="test.fastq" ftype="fastq"/> <param name="select_ftype" value="generic"/> <param name="select_split_method" value="number"/> @@ -397,7 +397,7 @@ </output_collection> </test> <!-- splitting of fasta w random assignment and specific filename prefix --> - <test> + <test expect_num_outputs="1"> <param name="input" value="test.fasta" ftype="fasta"/> <param name="select_ftype" value="fasta"/> <param name="mode" value="numnew"/> @@ -411,7 +411,7 @@ </output_collection> </test> <!-- splitting of fasta w batch assignment and specific filename prefix --> - <test> + <test expect_num_outputs="1"> <param name="input" value="test.fasta" ftype="fasta"/> <param name="select_ftype" value="fasta"/> <param name="mode" value="numnew"/> @@ -424,7 +424,7 @@ </output_collection> </test> <!-- splitting of txt w default (alternating assignment) --> - <test> + <test expect_num_outputs="1"> <param name="input" value="test.tabular" ftype="txt"/> <param name="select_ftype" value="txt"/> <param name="mode" value="numnew"/> @@ -436,7 +436,7 @@ </output_collection> </test> <!-- generic-regex splitting (of txt) w default assignement (alternating) --> - <test> + <test expect_num_outputs="1"> <param name="input" value="test.tabular" ftype="txt"/> <param name="select_ftype" value="generic"/> <param name="select_split_method" value="regex"/> @@ -450,7 +450,7 @@ </output_collection> </test> <!-- generic-regex splitting (of a fasta) w random assignment --> - <test> + <test expect_num_outputs="1"> <param name="input" value="test.fasta" ftype="fasta"/> <param name="select_ftype" value="generic"/> <param name="select_split_method" value="regex"/> @@ -466,7 +466,7 @@ </output_collection> </test> <!-- sdf + specify desired number of files --> - <test> + <test expect_num_outputs="1"> <param name="input" value="3_molecules.sdf" ftype="sdf"/> <param name="select_ftype" value="sdf"/> <param name="mode" value="numnew"/> @@ -480,7 +480,7 @@ </output_collection> </test> <!-- sdf + specify desired number of records per file (chunksize) --> - <test> + <test expect_num_outputs="1"> <param name="input" value="3_molecules.sdf" ftype="sdf"/> <param name="select_ftype" value="sdf"/> <param name="mode" value="chunk"/> @@ -494,7 +494,7 @@ </output_collection> </test> <!-- test split_after (by splitting fasta files after non-header lines) --> - <test> + <test expect_num_outputs="1"> <param name="input" value="test.fasta" ftype="fasta"/> <param name="select_ftype" value="generic"/> <param name="select_split_method" value="regex"/> @@ -510,6 +510,21 @@ <element name="rand_000001" file="rand_1.fasta" ftype="fasta"/> </output_collection> </test> + <test expect_num_outputs="1"> + <param name="input" value="test.tabular" ftype="tabular"/> + <param name="select_ftype" value="tabular"/> + <param name="select_split_by" value="row"/> + <param name="top" value="2"/> + <param name="mode" value="chunk"/> + <param name="chunksize" value="1"/> + <param name="allocate" value="batch"/> + <output_collection name="list_output_tab" type="list"> + <element name="split_file_000000.tabular" file="split_file_0.tabular" ftype="tabular"/> + <element name="split_file_000001.tabular" file="split_file_1.tabular" ftype="tabular"/> + <element name="split_file_000002.tabular" file="split_file_2.tabular" ftype="tabular"/> + <element name="split_file_000003.tabular" file="split_file_3.tabular" ftype="tabular"/> + </output_collection> + </test> </tests> <help><![CDATA[ **Split file into a dataset collection**
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/split_file_0.tabular Thu May 23 15:03:47 2024 +0000 @@ -0,0 +1,3 @@ +#This is a file +#file data +foo.mgf bar
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/split_file_1.tabular Thu May 23 15:03:47 2024 +0000 @@ -0,0 +1,3 @@ +#This is a file +#file data +foo2.mgf bar2