Mercurial > repos > bgruening > split_file_on_column
changeset 6:ff2a81aa3f08 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_on_column commit 6a2deb2f38472a2845123bd54e73b6bd115b3a0b
author | bgruening |
---|---|
date | Tue, 19 Jul 2022 13:25:20 +0000 |
parents | d4b5b70e82cb |
children | |
files | split_file_on_column.xml test-data/5cols-unsorted-with-header.tabular test-data/5cols-unsorted.tabular |
diffstat | 3 files changed, 53 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/split_file_on_column.xml Mon Jul 04 12:26:46 2022 +0000 +++ b/split_file_on_column.xml Tue Jul 19 13:25:20 2022 +0000 @@ -1,14 +1,14 @@ -<tool id="tp_split_on_column" name="Split by group" version="0.5"> +<tool id="tp_split_on_column" name="Split by group" version="0.6"> <requirements> - <requirement type="package" version="5.0.1">gawk</requirement> + <requirement type="package" version="5.1.0">gawk</requirement> </requirements> <command> <![CDATA[ mkdir tmp_out && #if $include_header - awk -F '\t' 'NR==1{hdr=$0;next}f!="tmp_out/"\$$column".$infile.ext"{if(f) close(f); f="tmp_out/"\$$column".$infile.ext";print hdr>f} {print >> f}' $infile + awk -F '\t' 'NR==1{hdr=$0;next}f!="tmp_out/"\$$column".$infile.ext"{if(f) close(f); f="tmp_out/"\$$column".$infile.ext"}; {if (!seen[f]++) print hdr>f; print >> f}' $infile #else - awk -F'\t' '{print > "tmp_out/"\$$column".$infile.ext" }' '$infile' + awk -F'\t' '{print >> "tmp_out/"\$$column".$infile.ext" }' '$infile' #end if ]]> </command> @@ -80,6 +80,43 @@ </element> </output_collection> </test> + <test><!-- test with unsorted column, no header --> + <param name="infile" value="5cols-unsorted.tabular" ftype="tabular" /> + <param name="column" value="5" /> + <param name="include_header" value="false"/> + <output_collection name="split_output" type="list"> + <element name="1"> + <assert_contents> + <has_n_lines n="3" /> + </assert_contents> + </element> + <element name="2"> + <assert_contents> + <has_n_lines n="2" /> + </assert_contents> + </element> + </output_collection> + </test> + <test><!-- test with unsorted column, with header --> + <param name="infile" value="5cols-unsorted-with-header.tabular" ftype="tabular" /> + <param name="column" value="5" /> + <param name="include_header" value="true"/> + <output_collection name="split_output" type="list"> + <element name="1"> + <assert_contents> + <has_n_lines n="4" /> + <has_line_matching expression="Column1\tColumn2\tColumn3\tColumn4\tColumn5" /> + + </assert_contents> + </element> + <element name="2"> + <assert_contents> + <has_n_lines n="3" /> + <has_line_matching expression="Column1\tColumn2\tColumn3\tColumn4\tColumn5" /> + </assert_contents> + </element> + </output_collection> + </test> </tests> <help> <![CDATA[ @@ -112,7 +149,7 @@ chr4 60 80 -will produce a collectiion with 4 elements:: +will produce a collection with 3 elements:: chr1 10 20 chr1 30 40
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/5cols-unsorted-with-header.tabular Tue Jul 19 13:25:20 2022 +0000 @@ -0,0 +1,6 @@ +Column1 Column2 Column3 Column4 Column5 +chr7 56632 56652 cluster 1 +chr7 56736 56756 cluster 2 +chr7 56761 56781 cluster 1 +chr7 56772 56792 cluster 1 +chr7 56775 56795 cluster 2