Repository 'table_compute'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/table_compute

Changeset 4:93a3ce78ce55 (2021-04-20)
Previous changeset 3:60ff16842fcd (2019-10-18) Next changeset 5:3bf5661c0280 (2022-11-14)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/table_compute commit d741508e5ed912cdeee4f67ec8451b6e6865363c"
modified:
allowed_functions.xml
scripts/table_compute.py
table_compute.xml
added:
test-data/pivot.tsv
test-data/table1.tsv
b
diff -r 60ff16842fcd -r 93a3ce78ce55 allowed_functions.xml
--- a/allowed_functions.xml Fri Oct 18 06:22:51 2019 -0400
+++ b/allowed_functions.xml Tue Apr 20 15:46:10 2021 +0000
b
@@ -5,7 +5,7 @@
          This file is parsed by the python scripts to reduce
          duplicate definitions of allowed functions -->
     <macro name="select_vectorops" >
-        <option value="min" selected="true">Minimum</option>
+        <option value="min" >Minimum</option>
         <option value="max">Maximum</option>
         <option value="sum">Sum</option>
         <option value="count">Number of Non-NA Values</option>
b
diff -r 60ff16842fcd -r 93a3ce78ce55 scripts/table_compute.py
--- a/scripts/table_compute.py Fri Oct 18 06:22:51 2019 -0400
+++ b/scripts/table_compute.py Tue Apr 20 15:46:10 2021 +0000
[
@@ -314,12 +314,20 @@
             out_table = pd.melt(data, id_vars=melt_ids, value_vars=melt_values)
         elif general_mode == "pivot":
             pivot_index = params["PIVOT"]["pivot_index"]
-            pivot_column = params["PIVOT"]["pivot_column"]
+            pivot_column = params["PIVOT"]["pivot_columns"]
             pivot_values = params["PIVOT"]["pivot_values"]
+            pivot_aggfunc = params["PIVOT"]["pivot_aggfunc"]
 
-            out_table = data.pivot(
-                index=pivot_index, columns=pivot_column, values=pivot_values
-            )
+            if not(pivot_aggfunc):
+                out_table = data.pivot(
+                    index=pivot_index, columns=pivot_column, values=pivot_values
+                )
+            else:
+                out_table = data.pivot_table(
+                    index=pivot_index, columns=pivot_column, values=pivot_values,
+                    aggfunc=pivot_aggfunc
+                )
+
         elif general_mode == "custom":
             custom_func = params["fulltable_customop"]
 
b
diff -r 60ff16842fcd -r 93a3ce78ce55 table_compute.xml
--- a/table_compute.xml Fri Oct 18 06:22:51 2019 -0400
+++ b/table_compute.xml Tue Apr 20 15:46:10 2021 +0000
[
b'@@ -1,7 +1,8 @@\n-<tool id="table_compute" name="Table Compute" version="@VERSION@">\n+<tool id="table_compute" name="Table Compute" version="@VERSION@+galaxy@WRAPPER_VERSION@">\n     <description>computes operations on table data</description>\n     <macros>\n-        <token name="@VERSION@">0.9.2</token>\n+        <token name="@VERSION@">1.2.4</token>\n+        <token name="@WRAPPER_VERSION@">0</token>\n         <token name="@COPEN@"><![CDATA[<code>]]></token>\n         <token name="@CCLOSE@"><![CDATA[</code>]]></token>\n         <import>allowed_functions.xml</import>\n@@ -92,8 +93,8 @@\n         </macro>\n     </macros>\n     <requirements>\n-        <requirement type="package" version="0.25">pandas</requirement>\n-        <requirement type="package" version="1.17">numpy</requirement>\n+        <requirement type="package" version="1.2.4">pandas</requirement>\n+        <requirement type="package" version="1.19.2">numpy</requirement>\n     </requirements>\n \n     <version_command><![CDATA[\n@@ -260,27 +261,68 @@\n             "melt_values": $melt_values,\n         },\n       #elif str($singtabop.user.general.use) == \'pivot\':\n-        #set $pivot_index = str($singtabop.user.general.index).strip()\n-        #if $pivot_index:\n-          #set $pivot_index = "\'" + $pivot_index + "\'"\n-        #else:\n+        #set $pivot_index = None\n+        #set $pivot_columns = None\n+        #set $pivot_values = None\n+        #set $pivot_aggfunc = None\n+\n+        #if str($singtabop.user.general.index).strip()\n+          #set $pivot_index = [i.strip() for i in str($singtabop.user.general.index).split(\',\')]\n+          #if len($pivot_index)==1\n+            #set $pivot_index = "\'"+$pivot_index[0]+"\'"\n+          #end if\n+        #else\n           #set $pivot_index = \'None\'\n         #end if\n-        #set $pivot_column = "\'" + str($singtabop.user.general.column).strip() + "\'"\n-        #if str($singtabop.user.general.values).strip():\n+        #if str($singtabop.user.general.column).strip()\n+          #set $pivot_columns = [i.strip() for i in str($singtabop.user.general.column).split(\',\')]\n+          #if len($pivot_columns)==1\n+            #set $pivot_columns = "\'"+$pivot_columns[0]+"\'"\n+          #end if\n+        #else\n+          #set $pivot_columns = \'None\'\n+        #end if\n+        #if str($singtabop.user.general.values).strip()\n           #set $pivot_values = [i.strip() for i in str($singtabop.user.general.values).split(\',\')]\n+          #if len($pivot_values)==1\n+            #set $pivot_values = "\'"+$pivot_values[0]+"\'"\n+          #end if\n         #else\n           #set $pivot_values = \'None\'\n         #end if\n+\n+        ## The aggfunc is either a - column : [array] map\n+        ## or just an [array]\n+        #if str($singtabop.user.general.aggfunc).strip() == \'None\'\n+          #set $pivot_aggfunc = \'None\'\n+        #end if\n+        #if not($pivot_aggfunc)\n+          #set $pivot_aggfunc = [i.strip() for i in str($singtabop.user.general.aggfunc).split(\',\')]\n+          #if len($pivot_aggfunc)==1\n+            #set $pivot_aggfunc = "\'"+$pivot_aggfunc[0]+"\'"\n+          #end if\n+        #end if\n+\n         "PIVOT": {\n             "pivot_index": $pivot_index,\n-            "pivot_column": $pivot_column,\n+            "pivot_columns": $pivot_columns,\n             "pivot_values": $pivot_values,\n+            #if $pivot_aggfunc == \'None\':\n+            "pivot_aggfunc": {\n+                #for $i, $s in enumerate($singtabop.user.general.colfun_map)\n+                #echo 16*\' \' + "\'" + str($s.column) + "\'" + \' : [\'\n+                #echo \',\'.join([ "\'%s\'" % $t.aggfunc for $j, $t in enumerate($s.functions) ])\n+                #echo \'],\\n\'\n+                #end for\n+            }\n+            #else\n+            "pivot_aggfunc": $pivot_aggfunc\n+            #end if\n         },\n+\n       #elif str($singtabop.user.general.use) == \'custom\':\n         #set $custom_func = str($singtabop.user.general.fulltable_custom_expr).strip()\n         "fulltable_customop": \'$custom_func\',\n-\n       #end if\n     },\n    '..b'ue="single" />\n+                <param name="input" value="table1.tsv" />\n+                <param name="col_row_names" value="has_col_names" />\n+                <conditional name="user" >\n+                    <param name="mode" value="fulltable" />\n+                    <conditional name="general" >\n+                        <param name="use" value="pivot" />\n+                        <param name="index" value="A" />\n+                        <param name="column" value="C" />\n+                        <param name="values" value="D" />\n+                        <param name="aggfunc" value="max" />\n+                    </conditional>\n+                </conditional>\n+            </conditional>\n+            <output name="table" >\n+                <assert_contents>\n+                    <has_text_matching expression="bar\\t7\\t6" />\n+                </assert_contents>\n+            </output>\n+        </test>\n     </tests>\n     <help><![CDATA[\n Table Compute\n@@ -1777,6 +1901,103 @@\n  g4  chr81   chr6  chr3\n  === =====  ===== =====\n \n+\n+Example 10: Pivot Table with unified Aggregator\n+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n+\n+For an input table of:\n+\n+=== === ===== === ===\n+  A   B     C   D   E\n+=== === ===== === ===\n+foo one small   1   2\n+foo one large   2   4\n+foo one large   2   5\n+foo two small   3   5\n+foo two small   3   6\n+bar one large   4   6\n+bar one small   5   8\n+bar two small   6   9\n+bar two large   7   9\n+=== === ===== === ===\n+\n+we wish to pivot the table with the \'A\' column as the new row index and use the values of the column \'C\' as the new column indexes, based on the aggregated values of \'D\'. By default the aggregator function is the mean, but here we will instead pick the max, to yield:\n+\n+=== == ==\n+C    l  s\n+A\n+=== == ==\n+bar  7  6\n+foo  2  3\n+=== == ==\n+\n+In Galaxy we would select the following:\n+\n+ * *Input Single or Multiple Tables* \xe2\x86\x92 **Single Table**\n+ * *Column names on first row?* \xe2\x86\x92 **Yes**\n+ * *Row names on first column?* \xe2\x86\x92 **Yes**\n+ * *Type of table operation* \xe2\x86\x92  **Perform a Full Table Operation**\n+\n+   * *Operation* \xe2\x86\x92 **Pivot**\n+   * *Index* \xe2\x86\x92 ``A``\n+   * *Column* \xe2\x86\x92 ``C``\n+   * *Values* \xe2\x86\x92 ``D``\n+   * *Aggregator Function* \xe2\x86\x92 ``max``\n+\n+\n+Example 11: Pivot Table with custom Aggregrator\n+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n+\n+For an input table of:\n+\n+\n+====  ==========  ===========  ===  ========\n+Name    Position         City  Age    Random\n+====  ==========  ===========  ===  ========\n+Mary     Manager       Boston   34  0.678577\n+Josh  Programmer     New York   37  0.973168\n+ Jon     Manager      Chicago   29  0.146668\n+Lucy     Manager  Los Angeles   40  0.150120\n+Jane  Programmer      Chicago   29  0.112769\n+ Sue  Programmer       Boston   31  0.185198\n+====  ==========  ===========  ===  ========\n+\n+we wish to pivot the table with the \'Position\' column as the new index and transform the \'Age\' and \'Random\' columns to have mean and standard deviation values\n+\n+==========  =========  ========  ========\n+Position       Age      Random    Random\n+.             mean       mean      std\n+==========  =========  ========  ========\n+Manager     34.333333  0.325122  0.306106\n+Programmer  32.333333  0.423712  0.477219\n+==========  =========  ========  ========\n+\n+In Galaxy we would select the following:\n+\n+ * *Input Single or Multiple Tables* \xe2\x86\x92 **Single Table**\n+ * *Column names on first row?* \xe2\x86\x92 **Yes**\n+ * *Row names on first column?* \xe2\x86\x92 **Yes**\n+ * *Type of table operation* \xe2\x86\x92  **Perform a Full Table Operation**\n+\n+   * *Operation* \xe2\x86\x92 **Pivot**\n+   * *Index* \xe2\x86\x92 ``Position``\n+   * *Column-Function Mapping*\n+\n+     * *Value Column* \xe2\x86\x92 ``Age``\n+\n+       * *Function* \xe2\x86\x92 ``mean``\n+\n+     * *Value Column* \xe2\x86\x92 ``Random``\n+\n+       * *Function* \xe2\x86\x92 ``mean``\n+       * *Function* \xe2\x86\x92 ``std``\n+\n+\n+This splits the matrix using "foo" and "bar" using only the values from "baz". Header values\n+ may contain extra information.\n+\n+\n+\n ]]></help>\n     <citations></citations>\n </tool>\n'
b
diff -r 60ff16842fcd -r 93a3ce78ce55 test-data/pivot.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pivot.tsv Tue Apr 20 15:46:10 2021 +0000
b
@@ -0,0 +1,7 @@
+ Name Position City Age Random
+0 Mary Manager Boston 34 0.678577
+1 Josh Programmer New York 37 0.973168
+2 Jon Manager Chicago 29 0.146668
+3 Lucy Manager Los Angeles 40 0.150120
+4 Jane Programmer Chicago 29 0.112769
+5 Sue Programmer Boston 31 0.185198
\ No newline at end of file
b
diff -r 60ff16842fcd -r 93a3ce78ce55 test-data/table1.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/table1.tsv Tue Apr 20 15:46:10 2021 +0000
b
@@ -0,0 +1,10 @@
+A B C D E
+foo one small 1 2
+foo one large 2 4
+foo one large 2 5
+foo two small 3 5
+foo two small 3 6
+bar one large 4 6
+bar one small 5 8
+bar two small 6 9
+bar two large 7 9