Repository 'split_file_on_column'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/split_file_on_column

Changeset 6:ff2a81aa3f08 (2022-07-19)
Previous changeset 5:d4b5b70e82cb (2022-07-04)
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_on_column commit 6a2deb2f38472a2845123bd54e73b6bd115b3a0b
modified:
split_file_on_column.xml
added:
test-data/5cols-unsorted-with-header.tabular
test-data/5cols-unsorted.tabular
b
diff -r d4b5b70e82cb -r ff2a81aa3f08 split_file_on_column.xml
--- a/split_file_on_column.xml Mon Jul 04 12:26:46 2022 +0000
+++ b/split_file_on_column.xml Tue Jul 19 13:25:20 2022 +0000
[
@@ -1,14 +1,14 @@
-<tool id="tp_split_on_column" name="Split by group" version="0.5">
+<tool id="tp_split_on_column" name="Split by group" version="0.6">
     <requirements>
-        <requirement type="package" version="5.0.1">gawk</requirement>
+        <requirement type="package" version="5.1.0">gawk</requirement>
     </requirements>
     <command>
 <![CDATA[
     mkdir tmp_out &&
     #if $include_header
-        awk -F '\t' 'NR==1{hdr=$0;next}f!="tmp_out/"\$$column".$infile.ext"{if(f) close(f); f="tmp_out/"\$$column".$infile.ext";print hdr>f} {print >> f}' $infile
+        awk -F '\t' 'NR==1{hdr=$0;next}f!="tmp_out/"\$$column".$infile.ext"{if(f) close(f); f="tmp_out/"\$$column".$infile.ext"}; {if (!seen[f]++) print hdr>f; print >> f}' $infile
     #else
-        awk -F'\t' '{print > "tmp_out/"\$$column".$infile.ext" }' '$infile'
+        awk -F'\t' '{print >> "tmp_out/"\$$column".$infile.ext" }' '$infile'
     #end if
 ]]>
     </command>
@@ -80,6 +80,43 @@
                 </element>
             </output_collection>
         </test>
+        <test><!-- test with unsorted column, no header -->
+            <param name="infile" value="5cols-unsorted.tabular" ftype="tabular" />
+            <param name="column" value="5" />
+            <param name="include_header" value="false"/>
+            <output_collection name="split_output" type="list">
+                <element name="1">
+                    <assert_contents>
+                        <has_n_lines n="3" />
+                    </assert_contents>
+                </element>
+                <element name="2">
+                    <assert_contents>
+                        <has_n_lines n="2" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test><!-- test with unsorted column, with header -->
+            <param name="infile" value="5cols-unsorted-with-header.tabular" ftype="tabular" />
+            <param name="column" value="5" />
+            <param name="include_header" value="true"/>
+            <output_collection name="split_output" type="list">
+                <element name="1">
+                    <assert_contents>
+                        <has_n_lines n="4" />
+                        <has_line_matching expression="Column1\tColumn2\tColumn3\tColumn4\tColumn5" />
+
+                    </assert_contents>
+                </element>
+                <element name="2">
+                    <assert_contents>
+                        <has_n_lines n="3" />
+                        <has_line_matching expression="Column1\tColumn2\tColumn3\tColumn4\tColumn5" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
     </tests>
     <help>
 <![CDATA[
@@ -112,7 +149,7 @@
     chr4 60 80
 
 
-will produce a collectiion with 4 elements::
+will produce a collection with 3 elements::
 
     chr1 10 20
     chr1 30 40
b
diff -r d4b5b70e82cb -r ff2a81aa3f08 test-data/5cols-unsorted-with-header.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/5cols-unsorted-with-header.tabular Tue Jul 19 13:25:20 2022 +0000
b
@@ -0,0 +1,6 @@
+Column1 Column2 Column3 Column4 Column5
+chr7 56632 56652 cluster 1
+chr7 56736 56756 cluster 2
+chr7 56761 56781 cluster 1
+chr7 56772 56792 cluster 1
+chr7 56775 56795 cluster 2
b
diff -r d4b5b70e82cb -r ff2a81aa3f08 test-data/5cols-unsorted.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/5cols-unsorted.tabular Tue Jul 19 13:25:20 2022 +0000
b
@@ -0,0 +1,5 @@
+chr7 56632 56652 cluster 1
+chr7 56736 56756 cluster 2
+chr7 56761 56781 cluster 1
+chr7 56772 56792 cluster 1
+chr7 56775 56795 cluster 2