Mercurial > repos > bgruening > plotly_parallel_coordinates_plot
changeset 2:9958188c6195 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/plotly_parallel_coordinates_plot commit b26c6ec671ec63cce1f86c70e928673ed2c82f82"
author | bgruening |
---|---|
date | Mon, 04 Nov 2019 12:20:51 -0500 |
parents | 7b21a9b5922f |
children | |
files | paracords_plot.py paracords_plot.xml |
diffstat | 2 files changed, 67 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/paracords_plot.py Wed Oct 10 02:29:28 2018 -0400 +++ b/paracords_plot.py Mon Nov 04 12:20:51 2019 -0500 @@ -3,42 +3,62 @@ import plotly import plotly.graph_objs as go import pandas as pd +import re -def main(infile, col_dimensions, categorized, col_color): + +def main(infile, col_dimensions, categorized, col_color, + dimension_mode='by_index'): """ Produce an interactive paracords plotting html Args: infile: str, tabular file col_dimensions: str, comma separated index numbers. For example: "3,4,5" col_color: str, index number + dimension_mode: str, one ['by_index', 'by_name'] """ df = pd.read_csv(infile, sep='\t', parse_dates=True) dimensions = [] - col_dimensions = [int(x)-1 for x in col_dimensions.split(',')] - for col in col_dimensions: - values = df[df.columns[col]] + + if dimension_mode not in ['by_index', 'by_name']: + raise ValueError("Select dimensions `{}` is not supported!"\ + .format(dimension_mode)) + if dimension_mode == 'by_index': + col_dimensions = [int(x)-1 for x in col_dimensions.split(',')] + col_dimensions = df.columns[col_dimensions] + else: + if '*' not in col_dimensions: + col_dimensions = [header.strip() for header in col_dimensions.split(',')] + else: + pattern = col_dimensions.strip() + col_dimensions = [header for header in df.columns + if re.search(pattern, header)] + + for col_name in col_dimensions: + values = df[col_name] if categorized == 'boolfalse' and all(type(e) is int for e in values ): dimensions.append( dict( values = values, tickformat = ",.2r", - label = df.columns[col]) + label = col_name) ) elif categorized == 'boolfalse' and all(type(e) is float for e in values ): dimensions.append( dict( values = values, tickformat = "g", - label = df.columns[col]) + label = col_name) ) else: unique_values = list(set(values)) unique_values.sort() + # cast to str, fix object indexing + unique_values = [repr(e) for e in unique_values] dimensions.append( dict( range = [0, len(unique_values)-1], tickvals = list(range(len(unique_values))), - ticktext = [str(e) for e in unique_values], - values = list(map(lambda e: unique_values.index(e), values )), - label = df.columns[col]) + ticktext = unique_values, + values = list(map(lambda e: unique_values.index(repr(e)), values )), + label = col_name) ) col_color = int(col_color) - 1 @@ -80,6 +100,8 @@ aparser.add_argument( "-d", "--col_dimensions", dest="col_dimensions") aparser.add_argument( "-t", "--categorized_datatype", dest="categorized") aparser.add_argument( "-c", "--col_color", dest="col_color") + aparser.add_argument( "-m", "--dimension_mode", dest="dimension_mode") args = aparser.parse_args() - main(args.infile, args.col_dimensions, args.categorized, args.col_color) \ No newline at end of file + main(args.infile, args.col_dimensions, args.categorized, args.col_color, + args.dimension_mode)
--- a/paracords_plot.xml Wed Oct 10 02:29:28 2018 -0400 +++ b/paracords_plot.xml Mon Nov 04 12:20:51 2019 -0500 @@ -1,4 +1,4 @@ -<tool id="plotly_parallel_coordinates_plot" name="Parallel Coordinates Plot" version="0.1"> +<tool id="plotly_parallel_coordinates_plot" name="Parallel Coordinates Plot" version="0.2"> <description>of tabular data</description> <requirements> <requirement type="package" version="3.6">python</requirement> @@ -9,15 +9,27 @@ <command detect_errors="aggressive"><![CDATA[ python '$__tool_directory__/paracords_plot.py' -i '$infile' - -d '$col_dimensions' + -m '$dimension_selections.selected_mode' + -d '$dimension_selections.col_dimensions' -t '$categorized_datatype' -c '$col_color' ]]> </command> <inputs> - <param name="infile" type="data" format="tabular" label="Select data file :"/> - <param name="col_dimensions" multiple="true" type="data_column" data_ref="infile" use_header_names="true" display="checkboxes" label="Select the columns for dimentions:"/> - <param name="categorized_datatype" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="All the dimensions in categorized datatype:"/> + <param name="infile" type="data" format="tabular" label="Select table data file "/> + <conditional name="dimension_selections"> + <param name="selected_mode" type="select" label="The mode of column selection"> + <option value="by_index" selected="true">By index</option> + <option value="by_name">By column name</option> + </param> + <when value="by_index"> + <param name="col_dimensions" multiple="true" type="data_column" data_ref="infile" use_header_names="true" display="checkboxes" label="Select the columns for dimentions:"/> + </when> + <when value="by_name"> + <param name="col_dimensions" type="text" value="^param_.*" label="Type the column names" help="Two modes: 1) multiple names separated by comma, e.g. header1,header2, or 2) include `*` to initiate regular expression searches for every column name, for example, ^param_.* will retreive all the headers starting with param_. Note: no mix of the two is supported. Use default sanitizer: string.ascii_letters + string.digits + " -=_.()/+*^,:?!")"/> + </when> + </conditional> + <param name="categorized_datatype" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Cast all the dimensions into categorical datatype?"/> <param name="col_color" type="data_column" data_ref="infile" use_header_names="true" label="Select a column containg the values for coloring:" help="e.g. mean_test_score"/> </inputs> @@ -39,6 +51,14 @@ <param name="col_color" value="3"/> <output name="output" file="parcoords_plot02.html" compare="sim_size"/> </test> + <test> + <param name="infile" value="parcoords02.tabular" ftype="tabular"/> + <param name="selected_mode" value="by_name"/> + <param name="col_dimensions" value="param_*"/> + <param name="categorized_datatype" value="false"/> + <param name="col_color" value="3"/> + <output name="output" file="parcoords_plot02.html" compare="sim_size"/> + </test> </tests> <help><![CDATA[ **What it does** @@ -51,4 +71,14 @@ ]]> </help> + <citations> + <citation type="bibtex"> + @online{plotly, + author = {Plotly Technologies Inc.}, + title = {Collaborative data science}, + publisher = {Plotly Technologies Inc.}, + address = {Montreal, QC}, year = {2015}, + url = {https://plot.ly} } + </citation> + </citations> </tool>