pandas_pivot_table: pandas_pivot

comparison pandas_pivot_table.xml @ 0:621144f8dbe9 draft

"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/pandas_pivot_table/ commit 80684939b0bf75abb5cc70a9878054c1f734b651-dirty"

author	jjohnson
date	Wed, 02 Dec 2020 22:59:57 +0000
parents
children	6f05390deffa

comparison

equal deleted inserted replaced

--1:000000000000
+:621144f8dbe9
+<tool id="pandas_pivot_table" name="Pivot Table" version="@VERSION@.0" python_template_version="3.5">
+<description>transform tabular data</description>
+<macros>
+<token name="@VERSION@">1.1.4</token>
+</macros>
+<requirements>
+<requirement type="package" version="@VERSION@">pandas</requirement>
+</requirements>
+<command detect_errors="exit_code"><![CDATA[
+$__tool_directory__/pandas_pivot_table.py
+#if $skiprows
+--skiprows $skiprows
+#end if
+#if $header.header_choice == 'prefix'
+--prefix $header.prefix
+#elif $header.header_choice == 'enter_names'
+--header $header.names
+#end if
+--index '$pvt_index'
+--columns '$pvt_columns'
+--values '$pvt_values'
+--aggfunc='$aggfunc'
+--input '$input'
+--output '$output'
+]]></command>
+<inputs>
+<param name="input" type="data" format="tabular" label="Tabular table for pivot transformation"/>
+<conditional name="header">
+<param name="header_choice" type="select" label="Use as header">
+<option value="first_line">Dataset has column names in the first line</option>
+<option value="enter_names">Enter names for columns</option>
+<option value="prefix">Prefix + column position (0-indexed)</option>
+</param>
+<when value="first_line"/>
+<when value="enter_names">
+<param name="names" type="text" value="" label="Names for columns (no duplicates) separated by commas">
+<validator  type="regex" message="Column names separated by commas">^[A-Za-z]\w*(,[A-Za-z]\w*)*$</validator>
+</param>
+</when>
+<when value="prefix">
+<param name="prefix" type="text" value="C" label="Prefix before each column number (0-indexed)">
+<validator  type="regex" message="A-Za-z,A-Za-z0-9_">^[A-Za-z]\w*$</validator>
+</param>
+</when>
+</conditional>
+	<param name="skiprows" type="integer" value="0" min="0" label="Skip table rows"/>
+	<param name="pvt_index" type="text" value="" label="Pivot table index columns">
+<validator type="regex" message="Column names separated by commas">^\S+(,\S+)*$</validator>
+</param>
+	<param name="pvt_columns" type="text" value="" label="Pivot table columns to split into output columns">
+<validator type="regex" message="Column names separated by commas">^\S+(,\S+)*$</validator>
+</param>
+	<param name="pvt_values" type="text" value="" label="Pivot table value  columns">
+<validator type="regex" message="Column names separated by commas">^\S+(,\S+)*$</validator>
+</param>
+	<param name="aggfunc" type="text" value="" label="Pivot table aggregate function">
+<help><![CDATA[A valid JSON string, e.g.:
+<ul>
+<li>A single function applied to each <i>value</i> column:  <b>"min"</b></li>
+<li>An array of functions applied to each <i>value</i> column:  <b>["min", "max", "mean", "std"]</b></li>
+<li>A dictionary of <i>value column : functions</i>: <b>{"A" : "sum", "B" : ["min", "max"]}</b></li>
+</ul>
+]]></help>
+<sanitizer>
+<valid initial="string.printable">
+<remove value="&apos;"/>
+</valid>
+<mapping initial="none">
+<add source="&apos;" target="__sq__"/>
+</mapping>
+</sanitizer>
+</param>
+</inputs>
+<outputs>
+<data name="output" format="tabular"/>
+</outputs>
+<tests>
+<test>
+<param name="input" ftype="tabular" value="table1.tsv"/>
+<conditional name="header">
+<param name="header_choice" value="first_line"/>
+</conditional>
+	    <param name="pvt_index" value="A" />
+	    <param name="pvt_columns" value="C"/>
+	    <param name="pvt_values" value="D"/>
+	    <param name="aggfunc" value="&quot;max&quot;"/>
+<output name="output">
+<assert_contents>
+<has_text_matching expression="bar\t7\t6" />
+</assert_contents>
+</output>
+</test>
+<test>
+<param name="input" ftype="tabular" value="table1.tsv"/>
+<conditional name="header">
+<param name="header_choice" value="first_line"/>
+</conditional>
+	    <param name="pvt_index" value="A" />
+	    <param name="pvt_columns" value="C"/>
+	    <param name="pvt_values" value="D"/>
+	    <param name="aggfunc" value="[&quot;min&quot;,&quot;max&quot;]"/>
+<output name="output">
+<assert_contents>
+<has_text_matching expression="bar\t4\t5\t7\t6" />
+</assert_contents>
+</output>
+</test>
+<test>
+<param name="input" ftype="tabular" value="table1.tsv"/>
+<conditional name="header">
+<param name="header_choice" value="first_line"/>
+</conditional>
+	    <param name="pvt_index" value="C,B" />
+	    <param name="pvt_columns" value="A"/>
+	    <param name="pvt_values" value="D,E"/>
+	    <param name="aggfunc" value="{&quot;D&quot; : [&quot;min&quot;,&quot;sum&quot;], &quot;E&quot; : &quot;mean&quot;}"/>
+<output name="output">
+<assert_contents>
+<has_text_matching expression="C\tB\tbar_min_D\tfoo_min_D\tbar_sum_D\tfoo_sum_D\tbar_mean_E\tfoo_mean_E"/>
+<has_text_matching expression="large\tone\t4[.]\d+\t2[.]\d+\t4[.]\d+\t4[.]\d+\t6[.]\d+\t4[.]5\d+"/>
+</assert_contents>
+</output>
+</test>
+</tests>
+<help><![CDATA[
+Perform a pivot table operation on a tabular dataset.
+This uses the python pandas_ package to read_ a tabular file, perform a pivot_table_ operation, and write_ out the result as a tabular dataset.
+.. _pandas: https://pandas.pydata.org/pandas-docs/stable/index.html
+.. _pivot_table: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pivot_table.html
+.. _read: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_table.html
+.. _write: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html
+**Examples**
+**Input dataset**::
+A   B   C     D  E
+foo one small 1  2
+foo one large 2  4
+foo one large 2  5
+foo two small 3  5
+foo two small 3  6
+bar one large 4  6
+bar one small 5  8
+bar two small 6  9
+bar two large 7  9
+**Example 1**
+Params:  *Index: A  Columns: C  values: D  aggfunc: "max"*
+Output::
+A   large_D	 small_D
+bar 7        6
+foo 2        3
+**Example 2**
+Params:  *Index: A  Columns: C  values: D  aggfunc: ["min", "max"]*
+Output::
+A   large_D_min small_D_min large_D_max small_D_max
+bar 4           5           7           6
+foo 2           1           2           3
+**Example 3**
+Params: *Index: A  Columns: C  values: D,E  aggfunc: "mean"*
+Output::
+A   large_D	 small_D  large_E  small_E
+bar 5.500000 5.500000 7.500000 8.500000
+foo 2.000000 2.333333 4.500000 4.333333
+**Example 4**
+Params:  *Index: A  Columns: C  values: D,E  aggfunc: {"D" : [ "min","sum"], "E" : "mean"}*
+Output::
+A   large_min_D small_min_D large_sum_D small_sum_D large_mean_E small_mean_E
+bar 4           5           11          11          7.500000     8.500000
+foo 2           1           4           7           4.500000     4.333333
+**Example 5**
+Params:  *Index: B,C  Columns: A  values: E  aggfunc: ["min","mean","max"]*
+Output::
+B   C     bar_E_min foo_E_min bar_E_mean foo_E_mean bar_E_max foo_E_max
+one large 6.000000  4.000000  6.000000   4.500000   6.000000  5.000000
+one small 8.000000  2.000000  8.000000   2.000000   8.000000  2.000000
+two large 9.000000            9.000000              9.000000
+two small 9.000000  5.000000  9.000000   5.500000   9.000000  6.000000
+]]></help>
+<citations>
+<citation type="doi">doi:10.5281/zenodo.4161697</citation>
+</citations>
+</tool>

Mercurial > repos > jjohnson > pandas_pivot_table

comparison pandas_pivot_table.xml @ 0:621144f8dbe9 draft