comparison datamash-ops.xml @ 3:419027d822d6 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/datamash commit 206ac70388ea11b168366f1e30bb44157e371c6e"
author iuc
date Sun, 10 Apr 2022 11:41:19 +0000
parents 562f3c677828
children 746e8e4bf929
comparison
equal deleted inserted replaced
2:562f3c677828 3:419027d822d6
1 <?xml version="1.0"?> 1 <tool id="datamash_ops" name="Datamash" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <tool id="datamash_ops" name="Datamash" version="@WRAPPER_VERSION@">
3 <description>(operations on tabular data)</description> 2 <description>(operations on tabular data)</description>
4 <macros> 3 <macros>
5 <import>macros.xml</import> 4 <import>macros.xml</import>
6 </macros> 5 </macros>
7 <expand macro="requirements" /> 6 <expand macro="requirements" />
12 $header_in 11 $header_in
13 $header_out 12 $header_out
14 $need_sort 13 $need_sort
15 $print_full_line 14 $print_full_line
16 $ignore_case 15 $ignore_case
17 #if str($grouping).strip() 16 @FIELD_SEPARATOR@
18 --group '$grouping' 17 #if str($grouping) != ''
18 --group '$grouping'
19 #end if 19 #end if
20 #for $oper in $operations 20 #for $oper in $operations
21 ${oper.op_name} 21 ${oper.op_name}
22 ${oper.op_column} 22 ${oper.op_column}
23 #end for 23 #end for
24 < $in_file > '$out_file' 24 < $in_file > '$out_file'
25 ]]> 25 ]]>
26 </command> 26 </command>
27 <expand macro="inputs_outputs"> 27 <expand macro="inputs_outputs">
28 <param help="Example: to group by the first and fourth fields, use 1,4." label="Group by fields" name="grouping" type="text"> 28 <param argument="--group" name="grouping" type="text" label="Group by fields" help="Group consecutive rows with equal values in the chosen fields. If no columns are specified, each operation is performed in the entire input file. Comma separated list of column indices, e.g. 1,5">
29 <validator message="Invalid value in field. Allowed values are 0-9, space, comma." type="regex">^[0-9, ]*$</validator> 29 <sanitizer invalid_char="">
30 <valid initial="string.digits">
31 <add value="," />
32 </valid>
33 <mapping initial="none">
34 <add source=" " target=""/>
35 </mapping>
36 </sanitizer>
37 <validator message="Invalid value in field. Allowed is a comma separated list of integer values or the empty string" type="regex">(^$)|(^\s*\d+\s*(,\s*\d+\s*)*$)</validator>
30 </param> 38 </param>
31 <param falsevalue="" help="--header-in" label="Input file has a header line" name="header_in" truevalue="--header-in" type="boolean" /> 39 <param argument="--sort" name="need_sort" type="boolean" truevalue="--sort" falsevalue="" label="Sort input" help="Input file must be sorted by the grouping columns. Enable this option to automatically sort the input."/>
32 <param falsevalue="" help="--header-out" label="Print header line" name="header_out" truevalue="--header-out" type="boolean" /> 40 <param argument="--header-in" type="boolean" truevalue="--header-in" falsevalue="" label="Input file has a header line" />
33 <param falsevalue="" help="--sort" label="Sort input" name="need_sort" truevalue="--sort" type="boolean" /> 41 <param argument="--header-out" type="boolean" truevalue="--header-out" falsevalue="" label="Print header line" />
34 <param falsevalue="" help="--full" label="Print all fields from input file" name="print_full_line" truevalue="--full" type="boolean" /> 42 <param argument="--full" name="print_full_line" type="boolean" truevalue="--full" falsevalue="" label="Print all fields from input file" />
35 <param falsevalue="" help="--ignore-case" label="Ignore case when grouping" name="ignore_case" truevalue="--ignore-case" type="boolean" /> 43 <param argument="--ignore-case" type="boolean" truevalue="--ignore-case" falsevalue="" label="Ignore case when grouping" />
36 <repeat default="1" min="1" name="operations" title="Operation to perform on each group"> 44 <repeat name="operations" default="1" min="1" title="Operation to perform on each group">
37 <param name="op_name" type="select" label="Type"> 45 <param name="op_name" type="select" label="Type">
38 <option value="count">count</option> 46 <option value="count">count</option>
39 <option value="sum">sum</option> 47 <option value="sum">sum</option>
40 <option value="min">minimum</option> 48 <option value="min">minimum</option>
41 <option value="max">maximum</option> 49 <option value="max">maximum</option>
80 <param name="op_name" value="sum" /> 88 <param name="op_name" value="sum" />
81 <param name="op_column" value="3" /> 89 <param name="op_column" value="3" />
82 </repeat> 90 </repeat>
83 <output file="group_compute_output.txt" name="out_file" ftype="tabular" /> 91 <output file="group_compute_output.txt" name="out_file" ftype="tabular" />
84 </test> 92 </test>
93 <test>
94 <param name="in_file" value="group_compute_input.txt" ftype="tsv" />
95 <param name="grouping" value="2" />
96 <param name="header_in" value="true" />
97 <param name="header_out" value="true" />
98 <param name="need_sort" value="true" />
99 <param name="print_full_line" value="false" />
100 <param name="ignore_case" value="false" />
101 <repeat name="operations">
102 <param name="op_name" value="sum" />
103 <param name="op_column" value="3" />
104 </repeat>
105 <output file="group_compute_output.txt" name="out_file" ftype="tsv" />
106 </test>
107 <test>
108 <param name="in_file" value="group_compute_input.csv" ftype="csv" />
109 <param name="grouping" value="2" />
110 <param name="header_in" value="true" />
111 <param name="header_out" value="true" />
112 <param name="need_sort" value="true" />
113 <param name="print_full_line" value="false" />
114 <param name="ignore_case" value="false" />
115 <repeat name="operations">
116 <param name="op_name" value="sum" />
117 <param name="op_column" value="3" />
118 </repeat>
119 <output name="out_file" ftype="csv">
120 <assert_contents>
121 <has_n_lines n="7"/>
122 <has_line line="Arts,1310"/>
123 </assert_contents>
124 </output>
125 </test>
85 </tests> 126 </tests>
86 <help> 127 <help>
87 <![CDATA[ 128 <![CDATA[
88 @HELP_HEADER@ 129 @HELP_HEADER@
89 130
97 138
98 - Find the average score in statistics course of college students, grouped by their college major. The input file has three fields (Name,Major,Score) and a header line:: 139 - Find the average score in statistics course of college students, grouped by their college major. The input file has three fields (Name,Major,Score) and a header line::
99 140
100 Name Major Score 141 Name Major Score
101 Bryan Arts 68 142 Bryan Arts 68
143 Gabriel Health-Medicine 100
102 Isaiah Arts 80 144 Isaiah Arts 80
103 Gabriel Health-Medicine 100
104 Tysza Business 92 145 Tysza Business 92
105 Zackery Engineering 54 146 Zackery Engineering 54
106 ... 147 ...
107 ... 148 ...
108 149
109 - Grouping the input by the second column (*Major*), and performing operations **mean** and **sample standard deviation** on the third column (*Score*), gives:: 150 - Grouping the input by the second column (*Major*), sorting the input, and performing operations **mean** and **sample standard deviation** on the third column (*Score*), gives::
110 151
111 GroupBy(Major) mean(Score) sstdev(Score) 152 GroupBy(Major) mean(Score) sstdev(Score)
112 Arts 68.9474 10.4215 153 Arts 68.9474 10.4215
113 Business 87.3636 5.18214 154 Business 87.3636 5.18214
114 Engineering 66.5385 19.8814 155 Engineering 66.5385 19.8814
115 Health-Medicine 90.6154 9.22441 156 Health-Medicine 90.6154 9.22441
116 Life-Sciences 55.3333 20.606 157 Life-Sciences 55.3333 20.606
117 Social-Sciences 60.2667 17.2273 158 Social-Sciences 60.2667 17.2273
159
160 Note that input needs sorting here, since the column used for grouping (*Major*) is not sorted.
118 161
119 This sample file is available at http://www.gnu.org/software/datamash . 162 This sample file is available at http://www.gnu.org/software/datamash .
120 163
121 **Example 2** 164 **Example 2**
122 165