Mercurial > repos > jjohnson > pandas_pivot_table
comparison pandas_pivot_table.xml @ 0:621144f8dbe9 draft
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/pandas_pivot_table/ commit 80684939b0bf75abb5cc70a9878054c1f734b651-dirty"
author | jjohnson |
---|---|
date | Wed, 02 Dec 2020 22:59:57 +0000 |
parents | |
children | 6f05390deffa |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:621144f8dbe9 |
---|---|
1 <tool id="pandas_pivot_table" name="Pivot Table" version="@VERSION@.0" python_template_version="3.5"> | |
2 <description>transform tabular data</description> | |
3 <macros> | |
4 <token name="@VERSION@">1.1.4</token> | |
5 </macros> | |
6 <requirements> | |
7 <requirement type="package" version="@VERSION@">pandas</requirement> | |
8 </requirements> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 $__tool_directory__/pandas_pivot_table.py | |
11 #if $skiprows | |
12 --skiprows $skiprows | |
13 #end if | |
14 #if $header.header_choice == 'prefix' | |
15 --prefix $header.prefix | |
16 #elif $header.header_choice == 'enter_names' | |
17 --header $header.names | |
18 #end if | |
19 --index '$pvt_index' | |
20 --columns '$pvt_columns' | |
21 --values '$pvt_values' | |
22 --aggfunc='$aggfunc' | |
23 --input '$input' | |
24 --output '$output' | |
25 ]]></command> | |
26 <inputs> | |
27 <param name="input" type="data" format="tabular" label="Tabular table for pivot transformation"/> | |
28 <conditional name="header"> | |
29 <param name="header_choice" type="select" label="Use as header"> | |
30 <option value="first_line">Dataset has column names in the first line</option> | |
31 <option value="enter_names">Enter names for columns</option> | |
32 <option value="prefix">Prefix + column position (0-indexed)</option> | |
33 </param> | |
34 <when value="first_line"/> | |
35 <when value="enter_names"> | |
36 <param name="names" type="text" value="" label="Names for columns (no duplicates) separated by commas"> | |
37 <validator type="regex" message="Column names separated by commas">^[A-Za-z]\w*(,[A-Za-z]\w*)*$</validator> | |
38 </param> | |
39 </when> | |
40 <when value="prefix"> | |
41 <param name="prefix" type="text" value="C" label="Prefix before each column number (0-indexed)"> | |
42 <validator type="regex" message="A-Za-z,A-Za-z0-9_">^[A-Za-z]\w*$</validator> | |
43 </param> | |
44 </when> | |
45 </conditional> | |
46 <param name="skiprows" type="integer" value="0" min="0" label="Skip table rows"/> | |
47 <param name="pvt_index" type="text" value="" label="Pivot table index columns"> | |
48 <validator type="regex" message="Column names separated by commas">^\S+(,\S+)*$</validator> | |
49 </param> | |
50 <param name="pvt_columns" type="text" value="" label="Pivot table columns to split into output columns"> | |
51 <validator type="regex" message="Column names separated by commas">^\S+(,\S+)*$</validator> | |
52 </param> | |
53 <param name="pvt_values" type="text" value="" label="Pivot table value columns"> | |
54 <validator type="regex" message="Column names separated by commas">^\S+(,\S+)*$</validator> | |
55 </param> | |
56 <param name="aggfunc" type="text" value="" label="Pivot table aggregate function"> | |
57 <help><![CDATA[A valid JSON string, e.g.: | |
58 <ul> | |
59 <li>A single function applied to each <i>value</i> column: <b>"min"</b></li> | |
60 <li>An array of functions applied to each <i>value</i> column: <b>["min", "max", "mean", "std"]</b></li> | |
61 <li>A dictionary of <i>value column : functions</i>: <b>{"A" : "sum", "B" : ["min", "max"]}</b></li> | |
62 </ul> | |
63 ]]></help> | |
64 <sanitizer> | |
65 <valid initial="string.printable"> | |
66 <remove value="'"/> | |
67 </valid> | |
68 <mapping initial="none"> | |
69 <add source="'" target="__sq__"/> | |
70 </mapping> | |
71 </sanitizer> | |
72 </param> | |
73 </inputs> | |
74 <outputs> | |
75 <data name="output" format="tabular"/> | |
76 </outputs> | |
77 <tests> | |
78 <test> | |
79 <param name="input" ftype="tabular" value="table1.tsv"/> | |
80 <conditional name="header"> | |
81 <param name="header_choice" value="first_line"/> | |
82 </conditional> | |
83 <param name="pvt_index" value="A" /> | |
84 <param name="pvt_columns" value="C"/> | |
85 <param name="pvt_values" value="D"/> | |
86 <param name="aggfunc" value=""max""/> | |
87 <output name="output"> | |
88 <assert_contents> | |
89 <has_text_matching expression="bar\t7\t6" /> | |
90 </assert_contents> | |
91 </output> | |
92 </test> | |
93 <test> | |
94 <param name="input" ftype="tabular" value="table1.tsv"/> | |
95 <conditional name="header"> | |
96 <param name="header_choice" value="first_line"/> | |
97 </conditional> | |
98 <param name="pvt_index" value="A" /> | |
99 <param name="pvt_columns" value="C"/> | |
100 <param name="pvt_values" value="D"/> | |
101 <param name="aggfunc" value="["min","max"]"/> | |
102 <output name="output"> | |
103 <assert_contents> | |
104 <has_text_matching expression="bar\t4\t5\t7\t6" /> | |
105 </assert_contents> | |
106 </output> | |
107 </test> | |
108 <test> | |
109 <param name="input" ftype="tabular" value="table1.tsv"/> | |
110 <conditional name="header"> | |
111 <param name="header_choice" value="first_line"/> | |
112 </conditional> | |
113 <param name="pvt_index" value="C,B" /> | |
114 <param name="pvt_columns" value="A"/> | |
115 <param name="pvt_values" value="D,E"/> | |
116 <param name="aggfunc" value="{"D" : ["min","sum"], "E" : "mean"}"/> | |
117 <output name="output"> | |
118 <assert_contents> | |
119 <has_text_matching expression="C\tB\tbar_min_D\tfoo_min_D\tbar_sum_D\tfoo_sum_D\tbar_mean_E\tfoo_mean_E"/> | |
120 <has_text_matching expression="large\tone\t4[.]\d+\t2[.]\d+\t4[.]\d+\t4[.]\d+\t6[.]\d+\t4[.]5\d+"/> | |
121 </assert_contents> | |
122 </output> | |
123 </test> | |
124 | |
125 </tests> | |
126 <help><![CDATA[ | |
127 Perform a pivot table operation on a tabular dataset. | |
128 | |
129 This uses the python pandas_ package to read_ a tabular file, perform a pivot_table_ operation, and write_ out the result as a tabular dataset. | |
130 | |
131 .. _pandas: https://pandas.pydata.org/pandas-docs/stable/index.html | |
132 .. _pivot_table: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pivot_table.html | |
133 .. _read: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_table.html | |
134 .. _write: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html | |
135 | |
136 **Examples** | |
137 | |
138 **Input dataset**:: | |
139 | |
140 A B C D E | |
141 foo one small 1 2 | |
142 foo one large 2 4 | |
143 foo one large 2 5 | |
144 foo two small 3 5 | |
145 foo two small 3 6 | |
146 bar one large 4 6 | |
147 bar one small 5 8 | |
148 bar two small 6 9 | |
149 bar two large 7 9 | |
150 | |
151 | |
152 **Example 1** | |
153 | |
154 Params: *Index: A Columns: C values: D aggfunc: "max"* | |
155 | |
156 Output:: | |
157 | |
158 A large_D small_D | |
159 bar 7 6 | |
160 foo 2 3 | |
161 | |
162 | |
163 **Example 2** | |
164 | |
165 Params: *Index: A Columns: C values: D aggfunc: ["min", "max"]* | |
166 | |
167 Output:: | |
168 | |
169 A large_D_min small_D_min large_D_max small_D_max | |
170 bar 4 5 7 6 | |
171 foo 2 1 2 3 | |
172 | |
173 | |
174 **Example 3** | |
175 | |
176 Params: *Index: A Columns: C values: D,E aggfunc: "mean"* | |
177 | |
178 Output:: | |
179 | |
180 A large_D small_D large_E small_E | |
181 bar 5.500000 5.500000 7.500000 8.500000 | |
182 foo 2.000000 2.333333 4.500000 4.333333 | |
183 | |
184 | |
185 **Example 4** | |
186 | |
187 Params: *Index: A Columns: C values: D,E aggfunc: {"D" : [ "min","sum"], "E" : "mean"}* | |
188 | |
189 Output:: | |
190 | |
191 A large_min_D small_min_D large_sum_D small_sum_D large_mean_E small_mean_E | |
192 bar 4 5 11 11 7.500000 8.500000 | |
193 foo 2 1 4 7 4.500000 4.333333 | |
194 | |
195 | |
196 **Example 5** | |
197 | |
198 Params: *Index: B,C Columns: A values: E aggfunc: ["min","mean","max"]* | |
199 | |
200 Output:: | |
201 | |
202 B C bar_E_min foo_E_min bar_E_mean foo_E_mean bar_E_max foo_E_max | |
203 one large 6.000000 4.000000 6.000000 4.500000 6.000000 5.000000 | |
204 one small 8.000000 2.000000 8.000000 2.000000 8.000000 2.000000 | |
205 two large 9.000000 9.000000 9.000000 | |
206 two small 9.000000 5.000000 9.000000 5.500000 9.000000 6.000000 | |
207 | |
208 ]]></help> | |
209 <citations> | |
210 <citation type="doi">doi:10.5281/zenodo.4161697</citation> | |
211 </citations> | |
212 </tool> |