comparison pandas_pivot_table.xml @ 0:621144f8dbe9 draft

"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/pandas_pivot_table/ commit 80684939b0bf75abb5cc70a9878054c1f734b651-dirty"
author jjohnson
date Wed, 02 Dec 2020 22:59:57 +0000
parents
children 6f05390deffa
comparison
equal deleted inserted replaced
-1:000000000000 0:621144f8dbe9
1 <tool id="pandas_pivot_table" name="Pivot Table" version="@VERSION@.0" python_template_version="3.5">
2 <description>transform tabular data</description>
3 <macros>
4 <token name="@VERSION@">1.1.4</token>
5 </macros>
6 <requirements>
7 <requirement type="package" version="@VERSION@">pandas</requirement>
8 </requirements>
9 <command detect_errors="exit_code"><![CDATA[
10 $__tool_directory__/pandas_pivot_table.py
11 #if $skiprows
12 --skiprows $skiprows
13 #end if
14 #if $header.header_choice == 'prefix'
15 --prefix $header.prefix
16 #elif $header.header_choice == 'enter_names'
17 --header $header.names
18 #end if
19 --index '$pvt_index'
20 --columns '$pvt_columns'
21 --values '$pvt_values'
22 --aggfunc='$aggfunc'
23 --input '$input'
24 --output '$output'
25 ]]></command>
26 <inputs>
27 <param name="input" type="data" format="tabular" label="Tabular table for pivot transformation"/>
28 <conditional name="header">
29 <param name="header_choice" type="select" label="Use as header">
30 <option value="first_line">Dataset has column names in the first line</option>
31 <option value="enter_names">Enter names for columns</option>
32 <option value="prefix">Prefix + column position (0-indexed)</option>
33 </param>
34 <when value="first_line"/>
35 <when value="enter_names">
36 <param name="names" type="text" value="" label="Names for columns (no duplicates) separated by commas">
37 <validator type="regex" message="Column names separated by commas">^[A-Za-z]\w*(,[A-Za-z]\w*)*$</validator>
38 </param>
39 </when>
40 <when value="prefix">
41 <param name="prefix" type="text" value="C" label="Prefix before each column number (0-indexed)">
42 <validator type="regex" message="A-Za-z,A-Za-z0-9_">^[A-Za-z]\w*$</validator>
43 </param>
44 </when>
45 </conditional>
46 <param name="skiprows" type="integer" value="0" min="0" label="Skip table rows"/>
47 <param name="pvt_index" type="text" value="" label="Pivot table index columns">
48 <validator type="regex" message="Column names separated by commas">^\S+(,\S+)*$</validator>
49 </param>
50 <param name="pvt_columns" type="text" value="" label="Pivot table columns to split into output columns">
51 <validator type="regex" message="Column names separated by commas">^\S+(,\S+)*$</validator>
52 </param>
53 <param name="pvt_values" type="text" value="" label="Pivot table value columns">
54 <validator type="regex" message="Column names separated by commas">^\S+(,\S+)*$</validator>
55 </param>
56 <param name="aggfunc" type="text" value="" label="Pivot table aggregate function">
57 <help><![CDATA[A valid JSON string, e.g.:
58 <ul>
59 <li>A single function applied to each <i>value</i> column: <b>"min"</b></li>
60 <li>An array of functions applied to each <i>value</i> column: <b>["min", "max", "mean", "std"]</b></li>
61 <li>A dictionary of <i>value column : functions</i>: <b>{"A" : "sum", "B" : ["min", "max"]}</b></li>
62 </ul>
63 ]]></help>
64 <sanitizer>
65 <valid initial="string.printable">
66 <remove value="&apos;"/>
67 </valid>
68 <mapping initial="none">
69 <add source="&apos;" target="__sq__"/>
70 </mapping>
71 </sanitizer>
72 </param>
73 </inputs>
74 <outputs>
75 <data name="output" format="tabular"/>
76 </outputs>
77 <tests>
78 <test>
79 <param name="input" ftype="tabular" value="table1.tsv"/>
80 <conditional name="header">
81 <param name="header_choice" value="first_line"/>
82 </conditional>
83 <param name="pvt_index" value="A" />
84 <param name="pvt_columns" value="C"/>
85 <param name="pvt_values" value="D"/>
86 <param name="aggfunc" value="&quot;max&quot;"/>
87 <output name="output">
88 <assert_contents>
89 <has_text_matching expression="bar\t7\t6" />
90 </assert_contents>
91 </output>
92 </test>
93 <test>
94 <param name="input" ftype="tabular" value="table1.tsv"/>
95 <conditional name="header">
96 <param name="header_choice" value="first_line"/>
97 </conditional>
98 <param name="pvt_index" value="A" />
99 <param name="pvt_columns" value="C"/>
100 <param name="pvt_values" value="D"/>
101 <param name="aggfunc" value="[&quot;min&quot;,&quot;max&quot;]"/>
102 <output name="output">
103 <assert_contents>
104 <has_text_matching expression="bar\t4\t5\t7\t6" />
105 </assert_contents>
106 </output>
107 </test>
108 <test>
109 <param name="input" ftype="tabular" value="table1.tsv"/>
110 <conditional name="header">
111 <param name="header_choice" value="first_line"/>
112 </conditional>
113 <param name="pvt_index" value="C,B" />
114 <param name="pvt_columns" value="A"/>
115 <param name="pvt_values" value="D,E"/>
116 <param name="aggfunc" value="{&quot;D&quot; : [&quot;min&quot;,&quot;sum&quot;], &quot;E&quot; : &quot;mean&quot;}"/>
117 <output name="output">
118 <assert_contents>
119 <has_text_matching expression="C\tB\tbar_min_D\tfoo_min_D\tbar_sum_D\tfoo_sum_D\tbar_mean_E\tfoo_mean_E"/>
120 <has_text_matching expression="large\tone\t4[.]\d+\t2[.]\d+\t4[.]\d+\t4[.]\d+\t6[.]\d+\t4[.]5\d+"/>
121 </assert_contents>
122 </output>
123 </test>
124
125 </tests>
126 <help><![CDATA[
127 Perform a pivot table operation on a tabular dataset.
128
129 This uses the python pandas_ package to read_ a tabular file, perform a pivot_table_ operation, and write_ out the result as a tabular dataset.
130
131 .. _pandas: https://pandas.pydata.org/pandas-docs/stable/index.html
132 .. _pivot_table: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pivot_table.html
133 .. _read: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_table.html
134 .. _write: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html
135
136 **Examples**
137
138 **Input dataset**::
139
140 A B C D E
141 foo one small 1 2
142 foo one large 2 4
143 foo one large 2 5
144 foo two small 3 5
145 foo two small 3 6
146 bar one large 4 6
147 bar one small 5 8
148 bar two small 6 9
149 bar two large 7 9
150
151
152 **Example 1**
153
154 Params: *Index: A Columns: C values: D aggfunc: "max"*
155
156 Output::
157
158 A large_D small_D
159 bar 7 6
160 foo 2 3
161
162
163 **Example 2**
164
165 Params: *Index: A Columns: C values: D aggfunc: ["min", "max"]*
166
167 Output::
168
169 A large_D_min small_D_min large_D_max small_D_max
170 bar 4 5 7 6
171 foo 2 1 2 3
172
173
174 **Example 3**
175
176 Params: *Index: A Columns: C values: D,E aggfunc: "mean"*
177
178 Output::
179
180 A large_D small_D large_E small_E
181 bar 5.500000 5.500000 7.500000 8.500000
182 foo 2.000000 2.333333 4.500000 4.333333
183
184
185 **Example 4**
186
187 Params: *Index: A Columns: C values: D,E aggfunc: {"D" : [ "min","sum"], "E" : "mean"}*
188
189 Output::
190
191 A large_min_D small_min_D large_sum_D small_sum_D large_mean_E small_mean_E
192 bar 4 5 11 11 7.500000 8.500000
193 foo 2 1 4 7 4.500000 4.333333
194
195
196 **Example 5**
197
198 Params: *Index: B,C Columns: A values: E aggfunc: ["min","mean","max"]*
199
200 Output::
201
202 B C bar_E_min foo_E_min bar_E_mean foo_E_mean bar_E_max foo_E_max
203 one large 6.000000 4.000000 6.000000 4.500000 6.000000 5.000000
204 one small 8.000000 2.000000 8.000000 2.000000 8.000000 2.000000
205 two large 9.000000 9.000000 9.000000
206 two small 9.000000 5.000000 9.000000 5.500000 9.000000 6.000000
207
208 ]]></help>
209 <citations>
210 <citation type="doi">doi:10.5281/zenodo.4161697</citation>
211 </citations>
212 </tool>