diff paracords_plot.py @ 2:9958188c6195 draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/plotly_parallel_coordinates_plot commit b26c6ec671ec63cce1f86c70e928673ed2c82f82"
author bgruening
date Mon, 04 Nov 2019 12:20:51 -0500
parents 7b21a9b5922f
children
line wrap: on
line diff
--- a/paracords_plot.py	Wed Oct 10 02:29:28 2018 -0400
+++ b/paracords_plot.py	Mon Nov 04 12:20:51 2019 -0500
@@ -3,42 +3,62 @@
 import plotly
 import plotly.graph_objs as go
 import pandas as pd
+import re
 
-def main(infile, col_dimensions, categorized, col_color):
+
+def main(infile, col_dimensions, categorized, col_color,
+         dimension_mode='by_index'):
     """
     Produce an interactive paracords plotting html
     Args:
         infile: str, tabular file
         col_dimensions: str, comma separated index numbers. For example: "3,4,5"
         col_color: str, index number
+        dimension_mode: str, one ['by_index', 'by_name']
     """
     df = pd.read_csv(infile, sep='\t', parse_dates=True)
 
     dimensions = []
-    col_dimensions = [int(x)-1 for x in col_dimensions.split(',')]
-    for col in col_dimensions:
-        values = df[df.columns[col]]
+
+    if dimension_mode not in ['by_index', 'by_name']:
+        raise ValueError("Select dimensions `{}` is not supported!"\
+                         .format(dimension_mode))
+    if dimension_mode == 'by_index':
+        col_dimensions = [int(x)-1 for x in col_dimensions.split(',')]
+        col_dimensions = df.columns[col_dimensions]
+    else:
+        if '*' not in col_dimensions:
+            col_dimensions = [header.strip() for header in col_dimensions.split(',')]
+        else:
+            pattern = col_dimensions.strip()
+            col_dimensions = [header for header in df.columns
+                              if re.search(pattern, header)]
+
+    for col_name in col_dimensions:
+        values = df[col_name]
         if categorized == 'boolfalse' and all(type(e) is int for e in values ):
             dimensions.append(
                 dict(   values = values,
                         tickformat = ",.2r",
-                        label = df.columns[col])
+                        label = col_name)
             )
         elif categorized == 'boolfalse' and all(type(e) is float for e in values ):
             dimensions.append(
                 dict(   values = values,
                         tickformat = "g",
-                        label = df.columns[col])
+                        label = col_name)
             )
         else:
             unique_values = list(set(values))
             unique_values.sort()
+            # cast to str, fix object indexing
+            unique_values = [repr(e) for e in unique_values]
             dimensions.append(
                 dict(   range = [0, len(unique_values)-1],
                         tickvals = list(range(len(unique_values))),
-                        ticktext = [str(e) for e in unique_values],
-                        values = list(map(lambda e: unique_values.index(e), values )),
-                        label = df.columns[col])
+                        ticktext = unique_values,
+                        values = list(map(lambda e: unique_values.index(repr(e)), values )),
+                        label = col_name)
             )
 
     col_color = int(col_color) - 1
@@ -80,6 +100,8 @@
     aparser.add_argument( "-d", "--col_dimensions", dest="col_dimensions")
     aparser.add_argument( "-t", "--categorized_datatype", dest="categorized")
     aparser.add_argument( "-c", "--col_color", dest="col_color")
+    aparser.add_argument( "-m", "--dimension_mode", dest="dimension_mode")
     args = aparser.parse_args()
 
-    main(args.infile, args.col_dimensions, args.categorized, args.col_color)
\ No newline at end of file
+    main(args.infile, args.col_dimensions, args.categorized, args.col_color,
+         args.dimension_mode)