Mercurial > repos > jjohnson > pandas_pivot_table

--- a/pandas_pivot_table.py	Thu Dec 17 22:23:11 2020 +0000
+++ b/pandas_pivot_table.py	Fri Dec 18 19:35:57 2020 +0000
@@ -76,17 +76,21 @@
                     return val
         return None

-    def getColumn(name, dfcols):
+    def getColumn(name, dfcols, value_cols=None):
+        dfname = None
         if name in dfcols:
-            return name
+            dfname = name
         else:
             try:
-                i = int(name)
-                return dfcols[i]
-            except Exception:
-                print('%s not a column in %s' % (name, dfcols),
-                      file=sys.stderr)
-                exit(1)
+                i = int(name) - 1
+                dfname = dfcols[i]
+            except IndexError:
+                sys.exit('%s not an index into %s' % (name, dfcols))
+            except ValueError:
+                sys.exit('%s not a column in %s' % (name, dfcols))
+        if value_cols and dfname not in value_cols:
+            sys.exit('%s not a value column in %s' % (name, value_cols))
+        return dfname

     def getColumns(val, dfcols):
         fields = [v.strip() for v in val.split(',')]
@@ -95,16 +99,15 @@
             cols.append(getColumn(name, dfcols))
         return cols

-    def getAggFunc(funcStr, dfcols):
+    def getAggFunc(funcStr, dfcols, value_cols):
         af = funcStr
         try:
             af = json.loads(funcStr)
         except JSONDecodeError as de:
-            print('"%s" is not a json string: ' % funcStr, de.msg,
-                  file=sys.stderr)
-            exit(1)
+            sys.exit('"%s" is not a json string: %s' % (funcStr, de.msg))
         if isinstance(af, dict):
-            aggfunc = {getColumn(k, dfcols): v for k, v in af.items()}
+            aggfunc = {getColumn(k, dfcols, value_cols): v
+                       for k, v in af.items()}
         elif isinstance(af, list):
             aggfunc = af
         else:
@@ -127,11 +130,12 @@
     columns = getColumns(args.columns, df_columns)
     values = getColumns(args.values, df_columns)
     fill_value = getValueType(args.fill_value)
-    aggfunc = getAggFunc(args.aggfunc.replace('\'', '"'), values)
+    aggfunc = getAggFunc(args.aggfunc.replace('\'', '"'), df_columns, values)
     pdf = df.pivot_table(index=index, columns=columns,
                          values=values, aggfunc=aggfunc,
                          fill_value=fill_value)
-    pdf_cols = ['_'.join(reversed(p)) if isinstance(p, tuple) else p
+    pdf_cols = ['_'.join([str(x) for x in reversed(p)])
+                if isinstance(p, tuple) else str(p)
                 for p in pdf.columns.tolist()]
     pdf.to_csv(args.output,
                sep='\t',
--- a/pandas_pivot_table.xml	Thu Dec 17 22:23:11 2020 +0000
+++ b/pandas_pivot_table.xml	Fri Dec 18 19:35:57 2020 +0000
@@ -7,6 +7,7 @@
         <token name="@AGGITEM@">'\S+'\s*:\s*@AGGFUNCS@</token>
         <token name="@AGGDICT@">{@AGGITEM@(,\s*@AGGITEM@)*}</token>
         <token name="@AGGF@">(@AGGFUNCS@|@AGGDICT@)</token>
+        <token name="@COL_HELP@">Name of column or 1-based oridinal position of column</token>
     </macros>
     <requirements>
         <requirement type="package" version="@VERSION@">pandas</requirement>
@@ -56,24 +57,28 @@
         </conditional>
 	<param name="skiprows" type="integer" value="0" min="0" label="Skip table rows"/>
 	<param name="pvt_index" type="text" value="" label="Pivot table index columns">
+            <help>@COL_HELP@</help>
             <validator type="regex" message="Column names separated by commas">^\S+(,\S+)*$</validator>
         </param>
 	<param name="pvt_columns" type="text" value="" label="Pivot table columns to split into output columns">
+            <help>@COL_HELP@</help>
             <validator type="regex" message="Column names separated by commas">^\S+(,\S+)*$</validator>
         </param>
 	<param name="pvt_values" type="text" value="" label="Pivot table value  columns">
+            <help>@COL_HELP@</help>
             <validator type="regex" message="Column names separated by commas">^\S+(,\S+)*$</validator>
         </param>
 	<param name="aggfunc" type="text" value="" label="Pivot table aggregate function">
             <help><![CDATA[
                 <ul>
                 <li>Available Number Functions: @AGGFUNC@</li>
-                <li>Specify functions as (remember the single quotes):</li>
+                <li>Specify functions as:</li>
                     <ul>
                       <li>  - A single function applied to each <i>value</i> column:  <b>'min'</b></li>
                       <li>  - An array of functions applied to each <i>value</i> column:  <b>['min', 'max', 'mean', 'std']</b></li>
                       <li>  - A dictionary of <i>value column : function(s)</i>: <b>{'A' : 'sum', 'B' : ['min', 'max']}</b></li>
                     </ul>
+                <li><i>(remember the single quotes)</i></li>
                 </ul>
             ]]></help>
             <validator type="regex" message="Do not forget the single quotes">@AGGF@</validator>