Mercurial > repos > jjohnson > pandas_pivot_table
view pandas_pivot_table.xml @ 1:c02f59711eb6 draft
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/pandas_pivot_table/ commit af9d36fa3efb1cf136a69e7ed1a5f06261f9b0d6-dirty"
| author | jjohnson | 
|---|---|
| date | Wed, 16 Dec 2020 16:13:51 +0000 | 
| parents | 621144f8dbe9 | 
| children | 6f05390deffa | 
line wrap: on
 line source
<tool id="pandas_pivot_table" name="Pivot Table" version="@VERSION@.0" python_template_version="3.5"> <description>transform tabular data</description> <macros> <token name="@VERSION@">1.1.4</token> </macros> <requirements> <requirement type="package" version="@VERSION@">pandas</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ $__tool_directory__/pandas_pivot_table.py #if $skiprows --skiprows $skiprows #end if #if $header.header_choice == 'prefix' --prefix $header.prefix #elif $header.header_choice == 'enter_names' --header $header.names #end if --index '$pvt_index' --columns '$pvt_columns' --values '$pvt_values' --aggfunc='$aggfunc' --input '$input' --output '$output' ]]></command> <inputs> <param name="input" type="data" format="tabular" label="Tabular table for pivot transformation"/> <conditional name="header"> <param name="header_choice" type="select" label="Use as header"> <option value="first_line">Dataset has column names in the first line</option> <option value="enter_names">Enter names for columns</option> <option value="prefix">Prefix + column position (0-indexed)</option> </param> <when value="first_line"/> <when value="enter_names"> <param name="names" type="text" value="" label="Names for columns (no duplicates) separated by commas"> <validator type="regex" message="Column names separated by commas">^[A-Za-z]\w*(,[A-Za-z]\w*)*$</validator> </param> </when> <when value="prefix"> <param name="prefix" type="text" value="C" label="Prefix before each column number (0-indexed)"> <validator type="regex" message="A-Za-z,A-Za-z0-9_">^[A-Za-z]\w*$</validator> </param> </when> </conditional> <param name="skiprows" type="integer" value="0" min="0" label="Skip table rows"/> <param name="pvt_index" type="text" value="" label="Pivot table index columns"> <validator type="regex" message="Column names separated by commas">^\S+(,\S+)*$</validator> </param> <param name="pvt_columns" type="text" value="" label="Pivot table columns to split into output columns"> <validator type="regex" message="Column names separated by commas">^\S+(,\S+)*$</validator> </param> <param name="pvt_values" type="text" value="" label="Pivot table value columns"> <validator type="regex" message="Column names separated by commas">^\S+(,\S+)*$</validator> </param> <param name="aggfunc" type="text" value="" label="Pivot table aggregate function"> <help><![CDATA[A valid JSON string, e.g.: <ul> <li>A single function applied to each <i>value</i> column: <b>"min"</b></li> <li>An array of functions applied to each <i>value</i> column: <b>["min", "max", "mean", "std"]</b></li> <li>A dictionary of <i>value column : functions</i>: <b>{"A" : "sum", "B" : ["min", "max"]}</b></li> </ul> ]]></help> <sanitizer> <valid initial="string.printable"> <remove value="'"/> </valid> <mapping initial="none"> <add source="'" target="__sq__"/> </mapping> </sanitizer> </param> </inputs> <outputs> <data name="output" format="tabular"/> </outputs> <tests> <test> <param name="input" ftype="tabular" value="table1.tsv"/> <conditional name="header"> <param name="header_choice" value="first_line"/> </conditional> <param name="pvt_index" value="A" /> <param name="pvt_columns" value="C"/> <param name="pvt_values" value="D"/> <param name="aggfunc" value=""max""/> <output name="output"> <assert_contents> <has_text_matching expression="bar\t7\t6" /> </assert_contents> </output> </test> <test> <param name="input" ftype="tabular" value="table1.tsv"/> <conditional name="header"> <param name="header_choice" value="first_line"/> </conditional> <param name="pvt_index" value="A" /> <param name="pvt_columns" value="C"/> <param name="pvt_values" value="D"/> <param name="aggfunc" value="["min","max"]"/> <output name="output"> <assert_contents> <has_text_matching expression="bar\t4\t5\t7\t6" /> </assert_contents> </output> </test> <test> <param name="input" ftype="tabular" value="table1.tsv"/> <conditional name="header"> <param name="header_choice" value="first_line"/> </conditional> <param name="pvt_index" value="C,B" /> <param name="pvt_columns" value="A"/> <param name="pvt_values" value="D,E"/> <param name="aggfunc" value="{"D" : ["min","sum"], "E" : "mean"}"/> <output name="output"> <assert_contents> <has_text_matching expression="C\tB\tbar_min_D\tfoo_min_D\tbar_sum_D\tfoo_sum_D\tbar_mean_E\tfoo_mean_E"/> <has_text_matching expression="large\tone\t4[.]\d+\t2[.]\d+\t4[.]\d+\t4[.]\d+\t6[.]\d+\t4[.]5\d+"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ Perform a pivot table operation on a tabular dataset. This uses the python pandas_ package to read_ a tabular file, perform a pivot_table_ operation, and write_ out the result as a tabular dataset. .. _pandas: https://pandas.pydata.org/pandas-docs/stable/index.html .. _pivot_table: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pivot_table.html .. _read: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_table.html .. _write: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html **Examples** **Input dataset**:: A B C D E foo one small 1 2 foo one large 2 4 foo one large 2 5 foo two small 3 5 foo two small 3 6 bar one large 4 6 bar one small 5 8 bar two small 6 9 bar two large 7 9 **Example 1** Params: *Index: A Columns: C values: D aggfunc: "max"* Output:: A large_D small_D bar 7 6 foo 2 3 **Example 2** Params: *Index: A Columns: C values: D aggfunc: ["min", "max"]* Output:: A large_D_min small_D_min large_D_max small_D_max bar 4 5 7 6 foo 2 1 2 3 **Example 3** Params: *Index: A Columns: C values: D,E aggfunc: "mean"* Output:: A large_D small_D large_E small_E bar 5.500000 5.500000 7.500000 8.500000 foo 2.000000 2.333333 4.500000 4.333333 **Example 4** Params: *Index: A Columns: C values: D,E aggfunc: {"D" : [ "min","sum"], "E" : "mean"}* Output:: A large_min_D small_min_D large_sum_D small_sum_D large_mean_E small_mean_E bar 4 5 11 11 7.500000 8.500000 foo 2 1 4 7 4.500000 4.333333 **Example 5** Params: *Index: B,C Columns: A values: E aggfunc: ["min","mean","max"]* Output:: B C bar_E_min foo_E_min bar_E_mean foo_E_mean bar_E_max foo_E_max one large 6.000000 4.000000 6.000000 4.500000 6.000000 5.000000 one small 8.000000 2.000000 8.000000 2.000000 8.000000 2.000000 two large 9.000000 9.000000 9.000000 two small 9.000000 5.000000 9.000000 5.500000 9.000000 6.000000 ]]></help> <citations> <citation type="doi">doi:10.5281/zenodo.4161697</citation> </citations> </tool>
