datamash_ops: datamash-ops.xml comparison

comparison datamash-ops.xml @ 3:419027d822d6 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/datamash commit 206ac70388ea11b168366f1e30bb44157e371c6e"

author	iuc
date	Sun, 10 Apr 2022 11:41:19 +0000
parents	562f3c677828
children	746e8e4bf929

comparison

equal deleted inserted replaced

-:562f3c677828
+:419027d822d6
-<?xml version="1.0"?>
+<tool id="datamash_ops" name="Datamash" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
-<tool id="datamash_ops" name="Datamash" version="@WRAPPER_VERSION@">
 <description>(operations on tabular data)</description>
 <macros>
 <import>macros.xml</import>
 </macros>
 <expand macro="requirements" />
 $header_in
 $header_out
 $need_sort
 $print_full_line
 $ignore_case
-#if str($grouping).strip()
+@FIELD_SEPARATOR@
---group '$grouping'
+#if str($grouping) != ''
+--group '$grouping'
 #end if
 #for $oper in $operations
 ${oper.op_name}
 ${oper.op_column}
 #end for
 < $in_file > '$out_file'
 ]]>
 </command>
 <expand macro="inputs_outputs">
-<param help="Example: to group by the first and fourth fields, use 1,4." label="Group by fields" name="grouping" type="text">
+<param argument="--group" name="grouping" type="text" label="Group by fields" help="Group consecutive rows with equal values in the chosen fields. If no columns are specified, each operation is performed in the entire input file. Comma separated list of column indices, e.g. 1,5">
-<validator message="Invalid value in field. Allowed values are 0-9, space, comma." type="regex">^[0-9, ]*$</validator>
+<sanitizer invalid_char="">
+<valid initial="string.digits">
+<add value="," />
+</valid>
+<mapping initial="none">
+<add source=" " target=""/>
+</mapping>
+</sanitizer>
+<validator message="Invalid value in field. Allowed is a comma separated list of integer values or the empty string" type="regex">(^$)|(^\s*\d+\s*(,\s*\d+\s*)*$)</validator>
 </param>
-<param falsevalue="" help="--header-in" label="Input file has a header line" name="header_in" truevalue="--header-in" type="boolean" />
+<param argument="--sort" name="need_sort" type="boolean" truevalue="--sort" falsevalue="" label="Sort input" help="Input file must be sorted by the grouping columns. Enable this option to automatically sort the input."/>
-<param falsevalue="" help="--header-out" label="Print header line" name="header_out" truevalue="--header-out" type="boolean" />
+<param argument="--header-in" type="boolean" truevalue="--header-in" falsevalue="" label="Input file has a header line" />
-<param falsevalue="" help="--sort" label="Sort input" name="need_sort" truevalue="--sort" type="boolean" />
+<param argument="--header-out" type="boolean" truevalue="--header-out" falsevalue="" label="Print header line" />
-<param falsevalue="" help="--full" label="Print all fields from input file" name="print_full_line" truevalue="--full" type="boolean" />
+<param argument="--full" name="print_full_line" type="boolean" truevalue="--full" falsevalue="" label="Print all fields from input file" />
-<param falsevalue="" help="--ignore-case" label="Ignore case when grouping" name="ignore_case" truevalue="--ignore-case" type="boolean" />
+<param argument="--ignore-case" type="boolean" truevalue="--ignore-case" falsevalue="" label="Ignore case when grouping" />
-<repeat default="1" min="1" name="operations" title="Operation to perform on each group">
+<repeat name="operations" default="1" min="1" title="Operation to perform on each group">
 <param name="op_name" type="select" label="Type">
 <option value="count">count</option>
 <option value="sum">sum</option>
 <option value="min">minimum</option>
 <option value="max">maximum</option>
 <param name="op_name" value="sum" />
 <param name="op_column" value="3" />
 </repeat>
 <output file="group_compute_output.txt" name="out_file" ftype="tabular" />
 </test>
+<test>
+<param name="in_file" value="group_compute_input.txt" ftype="tsv" />
+<param name="grouping" value="2" />
+<param name="header_in" value="true" />
+<param name="header_out" value="true" />
+<param name="need_sort" value="true" />
+<param name="print_full_line" value="false" />
+<param name="ignore_case" value="false" />
+<repeat name="operations">
+<param name="op_name" value="sum" />
+<param name="op_column" value="3" />
+</repeat>
+<output file="group_compute_output.txt" name="out_file" ftype="tsv" />
+</test>
+<test>
+<param name="in_file" value="group_compute_input.csv" ftype="csv" />
+<param name="grouping" value="2" />
+<param name="header_in" value="true" />
+<param name="header_out" value="true" />
+<param name="need_sort" value="true" />
+<param name="print_full_line" value="false" />
+<param name="ignore_case" value="false" />
+<repeat name="operations">
+<param name="op_name" value="sum" />
+<param name="op_column" value="3" />
+</repeat>
+<output name="out_file" ftype="csv">
+<assert_contents>
+<has_n_lines n="7"/>
+<has_line line="Arts,1310"/>
+</assert_contents>
+</output>
+</test>
 </tests>
 <help>
 <![CDATA[
 @HELP_HEADER@
 - Find the average score in statistics course of college students, grouped by their college major. The input file has three fields (Name,Major,Score) and a header line::
 Name        Major            Score
 Bryan       Arts             68
+Gabriel     Health-Medicine  100
 Isaiah      Arts             80
-Gabriel     Health-Medicine  100
 Tysza       Business         92
 Zackery     Engineering      54
 ...
 ...
-- Grouping the input by the second column (*Major*), and performing operations **mean** and **sample standard deviation** on the third column (*Score*), gives::
+- Grouping the input by the second column (*Major*), sorting the input, and performing operations **mean** and **sample standard deviation** on the third column (*Score*), gives::
 GroupBy(Major)     mean(Score)   sstdev(Score)
 Arts               68.9474       10.4215
 Business           87.3636       5.18214
 Engineering        66.5385       19.8814
 Health-Medicine    90.6154       9.22441
 Life-Sciences      55.3333       20.606
 Social-Sciences    60.2667       17.2273
+Note that input needs sorting here, since the column used for grouping (*Major*) is not sorted.
 This sample file is available at http://www.gnu.org/software/datamash .
 **Example 2**

Mercurial > repos > iuc > datamash_ops

comparison datamash-ops.xml @ 3:419027d822d6 draft