Mercurial > repos > bgruening > plotly_parallel_coordinates_plot
comparison paracords_plot.py @ 2:9958188c6195 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/plotly_parallel_coordinates_plot commit b26c6ec671ec63cce1f86c70e928673ed2c82f82"
| author | bgruening |
|---|---|
| date | Mon, 04 Nov 2019 12:20:51 -0500 |
| parents | 7b21a9b5922f |
| children |
comparison
equal
deleted
inserted
replaced
| 1:7b21a9b5922f | 2:9958188c6195 |
|---|---|
| 1 import sys | 1 import sys |
| 2 import argparse | 2 import argparse |
| 3 import plotly | 3 import plotly |
| 4 import plotly.graph_objs as go | 4 import plotly.graph_objs as go |
| 5 import pandas as pd | 5 import pandas as pd |
| 6 import re | |
| 6 | 7 |
| 7 def main(infile, col_dimensions, categorized, col_color): | 8 |
| 9 def main(infile, col_dimensions, categorized, col_color, | |
| 10 dimension_mode='by_index'): | |
| 8 """ | 11 """ |
| 9 Produce an interactive paracords plotting html | 12 Produce an interactive paracords plotting html |
| 10 Args: | 13 Args: |
| 11 infile: str, tabular file | 14 infile: str, tabular file |
| 12 col_dimensions: str, comma separated index numbers. For example: "3,4,5" | 15 col_dimensions: str, comma separated index numbers. For example: "3,4,5" |
| 13 col_color: str, index number | 16 col_color: str, index number |
| 17 dimension_mode: str, one ['by_index', 'by_name'] | |
| 14 """ | 18 """ |
| 15 df = pd.read_csv(infile, sep='\t', parse_dates=True) | 19 df = pd.read_csv(infile, sep='\t', parse_dates=True) |
| 16 | 20 |
| 17 dimensions = [] | 21 dimensions = [] |
| 18 col_dimensions = [int(x)-1 for x in col_dimensions.split(',')] | 22 |
| 19 for col in col_dimensions: | 23 if dimension_mode not in ['by_index', 'by_name']: |
| 20 values = df[df.columns[col]] | 24 raise ValueError("Select dimensions `{}` is not supported!"\ |
| 25 .format(dimension_mode)) | |
| 26 if dimension_mode == 'by_index': | |
| 27 col_dimensions = [int(x)-1 for x in col_dimensions.split(',')] | |
| 28 col_dimensions = df.columns[col_dimensions] | |
| 29 else: | |
| 30 if '*' not in col_dimensions: | |
| 31 col_dimensions = [header.strip() for header in col_dimensions.split(',')] | |
| 32 else: | |
| 33 pattern = col_dimensions.strip() | |
| 34 col_dimensions = [header for header in df.columns | |
| 35 if re.search(pattern, header)] | |
| 36 | |
| 37 for col_name in col_dimensions: | |
| 38 values = df[col_name] | |
| 21 if categorized == 'boolfalse' and all(type(e) is int for e in values ): | 39 if categorized == 'boolfalse' and all(type(e) is int for e in values ): |
| 22 dimensions.append( | 40 dimensions.append( |
| 23 dict( values = values, | 41 dict( values = values, |
| 24 tickformat = ",.2r", | 42 tickformat = ",.2r", |
| 25 label = df.columns[col]) | 43 label = col_name) |
| 26 ) | 44 ) |
| 27 elif categorized == 'boolfalse' and all(type(e) is float for e in values ): | 45 elif categorized == 'boolfalse' and all(type(e) is float for e in values ): |
| 28 dimensions.append( | 46 dimensions.append( |
| 29 dict( values = values, | 47 dict( values = values, |
| 30 tickformat = "g", | 48 tickformat = "g", |
| 31 label = df.columns[col]) | 49 label = col_name) |
| 32 ) | 50 ) |
| 33 else: | 51 else: |
| 34 unique_values = list(set(values)) | 52 unique_values = list(set(values)) |
| 35 unique_values.sort() | 53 unique_values.sort() |
| 54 # cast to str, fix object indexing | |
| 55 unique_values = [repr(e) for e in unique_values] | |
| 36 dimensions.append( | 56 dimensions.append( |
| 37 dict( range = [0, len(unique_values)-1], | 57 dict( range = [0, len(unique_values)-1], |
| 38 tickvals = list(range(len(unique_values))), | 58 tickvals = list(range(len(unique_values))), |
| 39 ticktext = [str(e) for e in unique_values], | 59 ticktext = unique_values, |
| 40 values = list(map(lambda e: unique_values.index(e), values )), | 60 values = list(map(lambda e: unique_values.index(repr(e)), values )), |
| 41 label = df.columns[col]) | 61 label = col_name) |
| 42 ) | 62 ) |
| 43 | 63 |
| 44 col_color = int(col_color) - 1 | 64 col_color = int(col_color) - 1 |
| 45 colors = df[df.columns[col_color]] | 65 colors = df[df.columns[col_color]] |
| 46 if all(type(e) is int for e in colors ): | 66 if all(type(e) is int for e in colors ): |
| 78 aparser = argparse.ArgumentParser() | 98 aparser = argparse.ArgumentParser() |
| 79 aparser.add_argument( "-i", "--input", dest="infile", required=True) | 99 aparser.add_argument( "-i", "--input", dest="infile", required=True) |
| 80 aparser.add_argument( "-d", "--col_dimensions", dest="col_dimensions") | 100 aparser.add_argument( "-d", "--col_dimensions", dest="col_dimensions") |
| 81 aparser.add_argument( "-t", "--categorized_datatype", dest="categorized") | 101 aparser.add_argument( "-t", "--categorized_datatype", dest="categorized") |
| 82 aparser.add_argument( "-c", "--col_color", dest="col_color") | 102 aparser.add_argument( "-c", "--col_color", dest="col_color") |
| 103 aparser.add_argument( "-m", "--dimension_mode", dest="dimension_mode") | |
| 83 args = aparser.parse_args() | 104 args = aparser.parse_args() |
| 84 | 105 |
| 85 main(args.infile, args.col_dimensions, args.categorized, args.col_color) | 106 main(args.infile, args.col_dimensions, args.categorized, args.col_color, |
| 107 args.dimension_mode) |
