# HG changeset patch # User bgruening # Date 1614268453 0 # Node ID 37a53100b67ebcb9bd5f455fe7af0eae11c1d7cb # Parent b60f2452580e92ac092e40fd205685cf500d7601 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_on_column commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606" diff -r b60f2452580e -r 37a53100b67e split_file_on_column.tar.gz Binary file split_file_on_column.tar.gz has changed diff -r b60f2452580e -r 37a53100b67e split_file_on_column.xml --- a/split_file_on_column.xml Wed Dec 23 03:50:48 2015 -0500 +++ b/split_file_on_column.xml Thu Feb 25 15:54:13 2021 +0000 @@ -1,17 +1,24 @@ - + according to the values of a column - gnu_awk + gawk "tmp_out/"\$$column".$infile.ext" }' $infile + mkdir tmp_out && + #if $include_header + awk -F '\t' 'NR==1{hdr=$0;next}f!="tmp_out/"\$$column".$infile.ext"{if(f) close(f); f="tmp_out/"\$$column".$infile.ext";print hdr>f} {print >> f}' $infile + #else + awk -F'\t' '{print > "tmp_out/"\$$column".$infile.ext" }' '$infile' + #end if ]]> + + @@ -22,6 +29,45 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -43,12 +89,15 @@ This tool splits a file into different smaller files using a specific column. It will work like the group tool, but every group is saved to its own file. +You have the option to include the header (first line) in all splitted files. +If you have a header and don't want keep it, please remove it before you use this tool. +For example with the "Remove beginning of a file" tool. ----- **Example** -Splitting on column 5 from this:: +Splitting a file without header on column 5 from this:: chr7 56632 56652 cluster 1 chr7 56736 56756 cluster 1 @@ -66,7 +115,19 @@ chr7 56772 56792 cluster 2 chr7 56775 56795 cluster 2 - ]]> + + +@misc{githubsplit_file_on_column, + author = {Gruening, Bjoern}, + year = {2015}, + title = {split_file_on_column}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/bgruening/galaxytools}, + } + + + diff -r b60f2452580e -r 37a53100b67e test-data/5cols-with-header.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/5cols-with-header.tabular Thu Feb 25 15:54:13 2021 +0000 @@ -0,0 +1,6 @@ +Column1 Column2 Column3 Column4 Column5 +chr7 56632 56652 cluster 1 +chr7 56736 56756 cluster 1 +chr7 56761 56781 cluster 2 +chr7 56772 56792 cluster 2 +chr7 56775 56795 cluster 2 diff -r b60f2452580e -r 37a53100b67e test-data/5cols.tabular --- a/test-data/5cols.tabular Wed Dec 23 03:50:48 2015 -0500 +++ b/test-data/5cols.tabular Thu Feb 25 15:54:13 2021 +0000 @@ -1,5 +1,5 @@ chr7 56632 56652 cluster 1 -chr7 56736 56756 cluster 1 +chr7 56736 56756 cluster 1 chr7 56761 56781 cluster 2 chr7 56772 56792 cluster 2 chr7 56775 56795 cluster 2 diff -r b60f2452580e -r 37a53100b67e tool_dependencies.xml --- a/tool_dependencies.xml Wed Dec 23 03:50:48 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ - - - - - -