Mercurial > repos > bgruening > plotly_parallel_coordinates_plot
comparison paracords_plot.py @ 2:9958188c6195 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/plotly_parallel_coordinates_plot commit b26c6ec671ec63cce1f86c70e928673ed2c82f82"
author | bgruening |
---|---|
date | Mon, 04 Nov 2019 12:20:51 -0500 |
parents | 7b21a9b5922f |
children |
comparison
equal
deleted
inserted
replaced
1:7b21a9b5922f | 2:9958188c6195 |
---|---|
1 import sys | 1 import sys |
2 import argparse | 2 import argparse |
3 import plotly | 3 import plotly |
4 import plotly.graph_objs as go | 4 import plotly.graph_objs as go |
5 import pandas as pd | 5 import pandas as pd |
6 import re | |
6 | 7 |
7 def main(infile, col_dimensions, categorized, col_color): | 8 |
9 def main(infile, col_dimensions, categorized, col_color, | |
10 dimension_mode='by_index'): | |
8 """ | 11 """ |
9 Produce an interactive paracords plotting html | 12 Produce an interactive paracords plotting html |
10 Args: | 13 Args: |
11 infile: str, tabular file | 14 infile: str, tabular file |
12 col_dimensions: str, comma separated index numbers. For example: "3,4,5" | 15 col_dimensions: str, comma separated index numbers. For example: "3,4,5" |
13 col_color: str, index number | 16 col_color: str, index number |
17 dimension_mode: str, one ['by_index', 'by_name'] | |
14 """ | 18 """ |
15 df = pd.read_csv(infile, sep='\t', parse_dates=True) | 19 df = pd.read_csv(infile, sep='\t', parse_dates=True) |
16 | 20 |
17 dimensions = [] | 21 dimensions = [] |
18 col_dimensions = [int(x)-1 for x in col_dimensions.split(',')] | 22 |
19 for col in col_dimensions: | 23 if dimension_mode not in ['by_index', 'by_name']: |
20 values = df[df.columns[col]] | 24 raise ValueError("Select dimensions `{}` is not supported!"\ |
25 .format(dimension_mode)) | |
26 if dimension_mode == 'by_index': | |
27 col_dimensions = [int(x)-1 for x in col_dimensions.split(',')] | |
28 col_dimensions = df.columns[col_dimensions] | |
29 else: | |
30 if '*' not in col_dimensions: | |
31 col_dimensions = [header.strip() for header in col_dimensions.split(',')] | |
32 else: | |
33 pattern = col_dimensions.strip() | |
34 col_dimensions = [header for header in df.columns | |
35 if re.search(pattern, header)] | |
36 | |
37 for col_name in col_dimensions: | |
38 values = df[col_name] | |
21 if categorized == 'boolfalse' and all(type(e) is int for e in values ): | 39 if categorized == 'boolfalse' and all(type(e) is int for e in values ): |
22 dimensions.append( | 40 dimensions.append( |
23 dict( values = values, | 41 dict( values = values, |
24 tickformat = ",.2r", | 42 tickformat = ",.2r", |
25 label = df.columns[col]) | 43 label = col_name) |
26 ) | 44 ) |
27 elif categorized == 'boolfalse' and all(type(e) is float for e in values ): | 45 elif categorized == 'boolfalse' and all(type(e) is float for e in values ): |
28 dimensions.append( | 46 dimensions.append( |
29 dict( values = values, | 47 dict( values = values, |
30 tickformat = "g", | 48 tickformat = "g", |
31 label = df.columns[col]) | 49 label = col_name) |
32 ) | 50 ) |
33 else: | 51 else: |
34 unique_values = list(set(values)) | 52 unique_values = list(set(values)) |
35 unique_values.sort() | 53 unique_values.sort() |
54 # cast to str, fix object indexing | |
55 unique_values = [repr(e) for e in unique_values] | |
36 dimensions.append( | 56 dimensions.append( |
37 dict( range = [0, len(unique_values)-1], | 57 dict( range = [0, len(unique_values)-1], |
38 tickvals = list(range(len(unique_values))), | 58 tickvals = list(range(len(unique_values))), |
39 ticktext = [str(e) for e in unique_values], | 59 ticktext = unique_values, |
40 values = list(map(lambda e: unique_values.index(e), values )), | 60 values = list(map(lambda e: unique_values.index(repr(e)), values )), |
41 label = df.columns[col]) | 61 label = col_name) |
42 ) | 62 ) |
43 | 63 |
44 col_color = int(col_color) - 1 | 64 col_color = int(col_color) - 1 |
45 colors = df[df.columns[col_color]] | 65 colors = df[df.columns[col_color]] |
46 if all(type(e) is int for e in colors ): | 66 if all(type(e) is int for e in colors ): |
78 aparser = argparse.ArgumentParser() | 98 aparser = argparse.ArgumentParser() |
79 aparser.add_argument( "-i", "--input", dest="infile", required=True) | 99 aparser.add_argument( "-i", "--input", dest="infile", required=True) |
80 aparser.add_argument( "-d", "--col_dimensions", dest="col_dimensions") | 100 aparser.add_argument( "-d", "--col_dimensions", dest="col_dimensions") |
81 aparser.add_argument( "-t", "--categorized_datatype", dest="categorized") | 101 aparser.add_argument( "-t", "--categorized_datatype", dest="categorized") |
82 aparser.add_argument( "-c", "--col_color", dest="col_color") | 102 aparser.add_argument( "-c", "--col_color", dest="col_color") |
103 aparser.add_argument( "-m", "--dimension_mode", dest="dimension_mode") | |
83 args = aparser.parse_args() | 104 args = aparser.parse_args() |
84 | 105 |
85 main(args.infile, args.col_dimensions, args.categorized, args.col_color) | 106 main(args.infile, args.col_dimensions, args.categorized, args.col_color, |
107 args.dimension_mode) |