Mercurial > repos > jjohnson > pandas_pivot_table
changeset 4:eaf2444a2a50 draft default tip
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/pandas_pivot_table/ commit de16c12e9e27d41d7c7624d7574c51b5bb8edff1-dirty"
author | jjohnson |
---|---|
date | Fri, 18 Dec 2020 19:35:57 +0000 |
parents | 4b65133e0722 |
children | |
files | pandas_pivot_table.py pandas_pivot_table.xml |
diffstat | 2 files changed, 25 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- a/pandas_pivot_table.py Thu Dec 17 22:23:11 2020 +0000 +++ b/pandas_pivot_table.py Fri Dec 18 19:35:57 2020 +0000 @@ -76,17 +76,21 @@ return val return None - def getColumn(name, dfcols): + def getColumn(name, dfcols, value_cols=None): + dfname = None if name in dfcols: - return name + dfname = name else: try: - i = int(name) - return dfcols[i] - except Exception: - print('%s not a column in %s' % (name, dfcols), - file=sys.stderr) - exit(1) + i = int(name) - 1 + dfname = dfcols[i] + except IndexError: + sys.exit('%s not an index into %s' % (name, dfcols)) + except ValueError: + sys.exit('%s not a column in %s' % (name, dfcols)) + if value_cols and dfname not in value_cols: + sys.exit('%s not a value column in %s' % (name, value_cols)) + return dfname def getColumns(val, dfcols): fields = [v.strip() for v in val.split(',')] @@ -95,16 +99,15 @@ cols.append(getColumn(name, dfcols)) return cols - def getAggFunc(funcStr, dfcols): + def getAggFunc(funcStr, dfcols, value_cols): af = funcStr try: af = json.loads(funcStr) except JSONDecodeError as de: - print('"%s" is not a json string: ' % funcStr, de.msg, - file=sys.stderr) - exit(1) + sys.exit('"%s" is not a json string: %s' % (funcStr, de.msg)) if isinstance(af, dict): - aggfunc = {getColumn(k, dfcols): v for k, v in af.items()} + aggfunc = {getColumn(k, dfcols, value_cols): v + for k, v in af.items()} elif isinstance(af, list): aggfunc = af else: @@ -127,11 +130,12 @@ columns = getColumns(args.columns, df_columns) values = getColumns(args.values, df_columns) fill_value = getValueType(args.fill_value) - aggfunc = getAggFunc(args.aggfunc.replace('\'', '"'), values) + aggfunc = getAggFunc(args.aggfunc.replace('\'', '"'), df_columns, values) pdf = df.pivot_table(index=index, columns=columns, values=values, aggfunc=aggfunc, fill_value=fill_value) - pdf_cols = ['_'.join(reversed(p)) if isinstance(p, tuple) else p + pdf_cols = ['_'.join([str(x) for x in reversed(p)]) + if isinstance(p, tuple) else str(p) for p in pdf.columns.tolist()] pdf.to_csv(args.output, sep='\t',
--- a/pandas_pivot_table.xml Thu Dec 17 22:23:11 2020 +0000 +++ b/pandas_pivot_table.xml Fri Dec 18 19:35:57 2020 +0000 @@ -7,6 +7,7 @@ <token name="@AGGITEM@">'\S+'\s*:\s*@AGGFUNCS@</token> <token name="@AGGDICT@">{@AGGITEM@(,\s*@AGGITEM@)*}</token> <token name="@AGGF@">(@AGGFUNCS@|@AGGDICT@)</token> + <token name="@COL_HELP@">Name of column or 1-based oridinal position of column</token> </macros> <requirements> <requirement type="package" version="@VERSION@">pandas</requirement> @@ -56,24 +57,28 @@ </conditional> <param name="skiprows" type="integer" value="0" min="0" label="Skip table rows"/> <param name="pvt_index" type="text" value="" label="Pivot table index columns"> + <help>@COL_HELP@</help> <validator type="regex" message="Column names separated by commas">^\S+(,\S+)*$</validator> </param> <param name="pvt_columns" type="text" value="" label="Pivot table columns to split into output columns"> + <help>@COL_HELP@</help> <validator type="regex" message="Column names separated by commas">^\S+(,\S+)*$</validator> </param> <param name="pvt_values" type="text" value="" label="Pivot table value columns"> + <help>@COL_HELP@</help> <validator type="regex" message="Column names separated by commas">^\S+(,\S+)*$</validator> </param> <param name="aggfunc" type="text" value="" label="Pivot table aggregate function"> <help><![CDATA[ <ul> <li>Available Number Functions: @AGGFUNC@</li> - <li>Specify functions as (remember the single quotes):</li> + <li>Specify functions as:</li> <ul> <li> - A single function applied to each <i>value</i> column: <b>'min'</b></li> <li> - An array of functions applied to each <i>value</i> column: <b>['min', 'max', 'mean', 'std']</b></li> <li> - A dictionary of <i>value column : function(s)</i>: <b>{'A' : 'sum', 'B' : ['min', 'max']}</b></li> </ul> + <li><i>(remember the single quotes)</i></li> </ul> ]]></help> <validator type="regex" message="Do not forget the single quotes">@AGGF@</validator>