# HG changeset patch # User jjohnson # Date 1606949997 0 # Node ID 621144f8dbe94fe36c3aad42dd2558a65ec086a2 "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/pandas_pivot_table/ commit 80684939b0bf75abb5cc70a9878054c1f734b651-dirty" diff -r 000000000000 -r 621144f8dbe9 pandas_pivot_table.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pandas_pivot_table.py Wed Dec 02 22:59:57 2020 +0000 @@ -0,0 +1,128 @@ +#!/usr/bin/env python + +import argparse +import json +import re +import pandas as pd +import sys +from json.decoder import JSONDecodeError + + +def __main__(): + p = argparse.ArgumentParser() + p.add_argument( + '-i', '--input', + type=argparse.FileType('r'), + required=True, + help='Tabular input file to pivot' + ) + p.add_argument( + '-o', '--output', + type=argparse.FileType('w'), + required=True, + help='Output file' + ) + p.add_argument( + '-S', '--skiprows', + type=int, + default=0, + help='Input column names' + ) + p.add_argument( + '-H', '--header', + default=None, + help='Input column names' + ) + p.add_argument( + '-P', '--prefix', + default=None, + help='Prefix for input column names' + ) + p.add_argument( + '-I', '--index', + help='index columns' + ) + p.add_argument( + '-C', '--columns', + help='columns values which are returned as columns' + ) + p.add_argument( + '-V', '--values', + help='values' + ) + p.add_argument( + '-F', '--aggfunc', + help='aggregate functions on the values' + ) + p.add_argument( + '-N', '--fill_value', + default=None, + help='fill value for missing values' + ) + args = p.parse_args() + + def getValueType(val): + if val or 0. == val: + try: + return int(val) + except ValueError: + try: + return float(val) + except ValueError: + return val + return None + + def getColumn(name, dfcols): + if name in dfcols: + return name + else: + try: + i = int(name) + return dfcols[i] + except: + print('%s not a column in %s' % (name, dfcols), file=sys.stderr) + exit(1) + + def getColumns(val, dfcols): + fields = [v.strip() for v in val.split(',')] + cols = [] + for name in fields: + cols.append(getColumn(name, dfcols)) + return cols + + def getAggFunc(funcStr, dfcols): + af = funcStr + try: + af = json.loads(funcStr) + except JSONDecodeError as de: + print('"%s" is not a json string: ' % funcStr, de.msg, file=sys.stderr) + exit(1) + if isinstance(af, dict): + aggfunc = {getColumn(k, dfcols) : v for k,v in af.items()} + elif isinstance(af, list): + aggfunc = af + else: + aggfunc = af + return aggfunc + + if args.prefix: + df = pd.read_table(args.input, skiprows=args.skiprows, header=None, prefix=args.prefix) + elif args.header: + df = pd.read_table(args.input, skiprows=args.skiprows, header=args.header) + else: + df = pd.read_table(args.input, skiprows=args.skiprows) + df_columns = df.columns.tolist() + index = getColumns(args.index, df_columns) + columns = getColumns(args.columns, df_columns) + values = getColumns(args.values, df_columns) + fill_value = getValueType(args.fill_value) + aggfunc = getAggFunc(args.aggfunc, values) + pdf = df.pivot_table(index=index, columns=columns, + values=values, aggfunc=aggfunc, + fill_value=fill_value) + pdf_cols = ['_'.join(reversed(p)) if isinstance(p, tuple) else p for p in pdf.columns.tolist()] + pdf.to_csv(args.output, sep='\t', float_format='%0.6f', header=pdf_cols) + + +if __name__ == "__main__": + __main__() diff -r 000000000000 -r 621144f8dbe9 pandas_pivot_table.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pandas_pivot_table.xml Wed Dec 02 22:59:57 2020 +0000 @@ -0,0 +1,212 @@ + + transform tabular data + + 1.1.4 + + + pandas + + + + + + + + + + + + + + ^[A-Za-z]\w*(,[A-Za-z]\w*)*$ + + + + + ^[A-Za-z]\w*$ + + + + + + ^\S+(,\S+)*$ + + + ^\S+(,\S+)*$ + + + ^\S+(,\S+)*$ + + + +
  • A single function applied to each value column: "min"
  • +
  • An array of functions applied to each value column: ["min", "max", "mean", "std"]
  • +
  • A dictionary of value column : functions: {"A" : "sum", "B" : ["min", "max"]}
  • + + ]]>
    + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + doi:10.5281/zenodo.4161697 + +
    diff -r 000000000000 -r 621144f8dbe9 test-data/table1.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/table1.tsv Wed Dec 02 22:59:57 2020 +0000 @@ -0,0 +1,10 @@ +A B C D E +foo one small 1 2 +foo one large 2 4 +foo one large 2 5 +foo two small 3 5 +foo two small 3 6 +bar one large 4 6 +bar one small 5 8 +bar two small 6 9 +bar two large 7 9