comparison pandas_pivot_table.py @ 4:eaf2444a2a50 draft default tip

"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/pandas_pivot_table/ commit de16c12e9e27d41d7c7624d7574c51b5bb8edff1-dirty"
author jjohnson
date Fri, 18 Dec 2020 19:35:57 +0000
parents 4b65133e0722
children
comparison
equal deleted inserted replaced
3:4b65133e0722 4:eaf2444a2a50
74 return float(val) 74 return float(val)
75 except ValueError: 75 except ValueError:
76 return val 76 return val
77 return None 77 return None
78 78
79 def getColumn(name, dfcols): 79 def getColumn(name, dfcols, value_cols=None):
80 dfname = None
80 if name in dfcols: 81 if name in dfcols:
81 return name 82 dfname = name
82 else: 83 else:
83 try: 84 try:
84 i = int(name) 85 i = int(name) - 1
85 return dfcols[i] 86 dfname = dfcols[i]
86 except Exception: 87 except IndexError:
87 print('%s not a column in %s' % (name, dfcols), 88 sys.exit('%s not an index into %s' % (name, dfcols))
88 file=sys.stderr) 89 except ValueError:
89 exit(1) 90 sys.exit('%s not a column in %s' % (name, dfcols))
91 if value_cols and dfname not in value_cols:
92 sys.exit('%s not a value column in %s' % (name, value_cols))
93 return dfname
90 94
91 def getColumns(val, dfcols): 95 def getColumns(val, dfcols):
92 fields = [v.strip() for v in val.split(',')] 96 fields = [v.strip() for v in val.split(',')]
93 cols = [] 97 cols = []
94 for name in fields: 98 for name in fields:
95 cols.append(getColumn(name, dfcols)) 99 cols.append(getColumn(name, dfcols))
96 return cols 100 return cols
97 101
98 def getAggFunc(funcStr, dfcols): 102 def getAggFunc(funcStr, dfcols, value_cols):
99 af = funcStr 103 af = funcStr
100 try: 104 try:
101 af = json.loads(funcStr) 105 af = json.loads(funcStr)
102 except JSONDecodeError as de: 106 except JSONDecodeError as de:
103 print('"%s" is not a json string: ' % funcStr, de.msg, 107 sys.exit('"%s" is not a json string: %s' % (funcStr, de.msg))
104 file=sys.stderr)
105 exit(1)
106 if isinstance(af, dict): 108 if isinstance(af, dict):
107 aggfunc = {getColumn(k, dfcols): v for k, v in af.items()} 109 aggfunc = {getColumn(k, dfcols, value_cols): v
110 for k, v in af.items()}
108 elif isinstance(af, list): 111 elif isinstance(af, list):
109 aggfunc = af 112 aggfunc = af
110 else: 113 else:
111 aggfunc = af 114 aggfunc = af
112 return aggfunc 115 return aggfunc
125 df_columns = df.columns.tolist() 128 df_columns = df.columns.tolist()
126 index = getColumns(args.index, df_columns) 129 index = getColumns(args.index, df_columns)
127 columns = getColumns(args.columns, df_columns) 130 columns = getColumns(args.columns, df_columns)
128 values = getColumns(args.values, df_columns) 131 values = getColumns(args.values, df_columns)
129 fill_value = getValueType(args.fill_value) 132 fill_value = getValueType(args.fill_value)
130 aggfunc = getAggFunc(args.aggfunc.replace('\'', '"'), values) 133 aggfunc = getAggFunc(args.aggfunc.replace('\'', '"'), df_columns, values)
131 pdf = df.pivot_table(index=index, columns=columns, 134 pdf = df.pivot_table(index=index, columns=columns,
132 values=values, aggfunc=aggfunc, 135 values=values, aggfunc=aggfunc,
133 fill_value=fill_value) 136 fill_value=fill_value)
134 pdf_cols = ['_'.join(reversed(p)) if isinstance(p, tuple) else p 137 pdf_cols = ['_'.join([str(x) for x in reversed(p)])
138 if isinstance(p, tuple) else str(p)
135 for p in pdf.columns.tolist()] 139 for p in pdf.columns.tolist()]
136 pdf.to_csv(args.output, 140 pdf.to_csv(args.output,
137 sep='\t', 141 sep='\t',
138 float_format=args.float_format, 142 float_format=args.float_format,
139 header=pdf_cols) 143 header=pdf_cols)