Mercurial > repos > bgruening > plotly_parallel_coordinates_plot
diff paracords_plot.py @ 2:9958188c6195 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/plotly_parallel_coordinates_plot commit b26c6ec671ec63cce1f86c70e928673ed2c82f82"
author | bgruening |
---|---|
date | Mon, 04 Nov 2019 12:20:51 -0500 |
parents | 7b21a9b5922f |
children |
line wrap: on
line diff
--- a/paracords_plot.py Wed Oct 10 02:29:28 2018 -0400 +++ b/paracords_plot.py Mon Nov 04 12:20:51 2019 -0500 @@ -3,42 +3,62 @@ import plotly import plotly.graph_objs as go import pandas as pd +import re -def main(infile, col_dimensions, categorized, col_color): + +def main(infile, col_dimensions, categorized, col_color, + dimension_mode='by_index'): """ Produce an interactive paracords plotting html Args: infile: str, tabular file col_dimensions: str, comma separated index numbers. For example: "3,4,5" col_color: str, index number + dimension_mode: str, one ['by_index', 'by_name'] """ df = pd.read_csv(infile, sep='\t', parse_dates=True) dimensions = [] - col_dimensions = [int(x)-1 for x in col_dimensions.split(',')] - for col in col_dimensions: - values = df[df.columns[col]] + + if dimension_mode not in ['by_index', 'by_name']: + raise ValueError("Select dimensions `{}` is not supported!"\ + .format(dimension_mode)) + if dimension_mode == 'by_index': + col_dimensions = [int(x)-1 for x in col_dimensions.split(',')] + col_dimensions = df.columns[col_dimensions] + else: + if '*' not in col_dimensions: + col_dimensions = [header.strip() for header in col_dimensions.split(',')] + else: + pattern = col_dimensions.strip() + col_dimensions = [header for header in df.columns + if re.search(pattern, header)] + + for col_name in col_dimensions: + values = df[col_name] if categorized == 'boolfalse' and all(type(e) is int for e in values ): dimensions.append( dict( values = values, tickformat = ",.2r", - label = df.columns[col]) + label = col_name) ) elif categorized == 'boolfalse' and all(type(e) is float for e in values ): dimensions.append( dict( values = values, tickformat = "g", - label = df.columns[col]) + label = col_name) ) else: unique_values = list(set(values)) unique_values.sort() + # cast to str, fix object indexing + unique_values = [repr(e) for e in unique_values] dimensions.append( dict( range = [0, len(unique_values)-1], tickvals = list(range(len(unique_values))), - ticktext = [str(e) for e in unique_values], - values = list(map(lambda e: unique_values.index(e), values )), - label = df.columns[col]) + ticktext = unique_values, + values = list(map(lambda e: unique_values.index(repr(e)), values )), + label = col_name) ) col_color = int(col_color) - 1 @@ -80,6 +100,8 @@ aparser.add_argument( "-d", "--col_dimensions", dest="col_dimensions") aparser.add_argument( "-t", "--categorized_datatype", dest="categorized") aparser.add_argument( "-c", "--col_color", dest="col_color") + aparser.add_argument( "-m", "--dimension_mode", dest="dimension_mode") args = aparser.parse_args() - main(args.infile, args.col_dimensions, args.categorized, args.col_color) \ No newline at end of file + main(args.infile, args.col_dimensions, args.categorized, args.col_color, + args.dimension_mode)