diff split_file_on_column.xml @ 5:d4b5b70e82cb draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_on_column commit df81dd26ed1cf67a0d95b9614738b1d59667773f
author bgruening
date Mon, 04 Jul 2022 12:26:46 +0000
parents 37a53100b67e
children ff2a81aa3f08
line wrap: on
line diff
--- a/split_file_on_column.xml	Thu Feb 25 15:54:13 2021 +0000
+++ b/split_file_on_column.xml	Mon Jul 04 12:26:46 2022 +0000
@@ -1,5 +1,4 @@
-<tool id="tp_split_on_column" name="Split file" version="0.4">
-    <description>according to the values of a column</description>
+<tool id="tp_split_on_column" name="Split by group" version="0.5">
     <requirements>
         <requirement type="package" version="5.0.1">gawk</requirement>
     </requirements>
@@ -14,14 +13,14 @@
 ]]>
     </command>
     <inputs>
-        <param format="tabular" name="infile" type="data" label="File to select" />
+        <param format="tabular" name="infile" type="data" label="File to split" />
         <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" />
 
-        <param name="include_header" type="boolean" label="Include the header in all splitted files?"
-               help="Include the first line (the assumed header line) in all splitted files." />
+        <param name="include_header" type="boolean" label="Include header in splits?"
+               help="Include the first line (the assumed header line) to all split files." />
     </inputs>
     <outputs>
-        <collection name="split_output" type="list" label="Table split on first column">
+        <collection name="split_output" type="list" label="Split by group collection">
             <discover_datasets pattern="__name_and_ext__" directory="tmp_out" />
         </collection>
     </outputs>
@@ -85,11 +84,19 @@
     <help>
 <![CDATA[
 
-**What it does**
+========
+Synopsis
+========
+
+Given a single input dataset this tool splits the file on unique values from a specified column.
 
-This tool splits a file into different smaller files using a specific column.
-It will work like the group tool, but every group is saved to its own file.
-You have the option to include the header (first line) in all splitted files.
+===========
+Description
+===========
+
+This tool splits a file into a collection based on unique values of a speific column.
+It performs a grouping operation with every group saved as a separate collection element.
+You have the option to include the header (first line) to all splits.
 If you have a header and don't want keep it, please remove it before you use this tool.
 For example with the "Remove beginning of a file" tool.
 
@@ -97,23 +104,28 @@
 
 **Example**
 
-Splitting a file without header on column 5 from this::
+Splitting this file on column 1::
 
-    chr7  56632  56652  cluster 1
-    chr7  56736  56756  cluster 1
-    chr7  56761  56781  cluster 2
-    chr7  56772  56792  cluster 2
-    chr7  56775  56795  cluster 2
-
-will produce 2 files with different clusters::
-
-    chr7  56632  56652  cluster 1
-    chr7  56736  56756  cluster 1
+    chr1 10 20
+    chr1 30 40
+    chr2 40 70
+    chr4 60 80
 
 
-    chr7  56761  56781  cluster 2
-    chr7  56772  56792  cluster 2
-    chr7  56775  56795  cluster 2
+will produce a collectiion with 4 elements::
+
+    chr1 10 20
+    chr1 30 40
+
+    chr2 40 70
+
+    chr4 60 80
+
+------
+
+.. image:: $PATH_TO_IMAGES/split_by_group.svg
+  :width: 800
+  :alt: Split by group
 
 ]]>
     </help>