Mercurial > repos > iuc > seaborn_pairgrid
changeset 0:4a46d83cb08f draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/seaborn commit 24dc6373560bd5e409fca84154634f5a528001c3
author | iuc |
---|---|
date | Wed, 14 May 2025 08:39:22 +0000 |
parents | |
children | |
files | macros.xml seaborn_pairgrid.xml test-data/mtcars.txt |
diffstat | 3 files changed, 237 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed May 14 08:39:22 2025 +0000 @@ -0,0 +1,87 @@ +<macros> + <token name="@TOOL_VERSION@">0.13.2</token> + <token name="@VERSION_SUFFIX@">0</token> + + <xml name="edam"> + <edam_topics> + <edam_topic>topic_0092</edam_topic> + </edam_topics> + <edam_operations> + <edam_operation>operation_0337</edam_operation> + </edam_operations> + </xml> + + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">seaborn</requirement> + <yield/> + </requirements> + </xml> + + <xml name="inputs"> + <param argument="--input_data" type="data" format="tsv,tabular,csv,parquet" label="Input data table" help="Provide the input data file in one of the supported formats: TSV, TXT, CSV, or Parquet. This file will be used to generate the plot." /> + </xml> + + <xml name="transformation"> + <param name="transformation" type="select" label="Transformation" help="Choose a transformation function to apply to the numerical data in the input file. This can be useful for scaling or normalizing the data before plotting."> + <option value="lambda x: x" selected="true">no transformation</option> + <option value="np.log10">log10</option> + <option value="np.log2">log2</option> + </param> + </xml> + + <xml name="columns" tokens="header"> + <param name="xcol" type="data_column" data_ref="input_data" label="x-axis" optional="true" use_header_names="@HEADER@" help="Select the column from the input data to use for the x-axis of the plot."/> + <param name="ycol" type="data_column" data_ref="input_data" label="y-axis" optional="true" use_header_names="@HEADER@" help="Select the column from the input data to use for the y-axis of the plot."/> + <section name="advanced_input" title="Advanced"> + <param name="hue" type="data_column" data_ref="input_data" label="hue" optional="true" use_header_names="@HEADER@" help="Select a column to group data by color (hue) in the plot. This is useful for visualizing categorical data."/> + <param name="col" type="data_column" data_ref="input_data" label="column-facetting" optional="true" use_header_names="@HEADER@" help="Select a column to create facets (subplots) along the columns of the plot grid. This is useful for visualizing how data varies across different categories or groups in the selected column."/> + <param name="row" type="data_column" data_ref="input_data" label="row-facetting" optional="true" use_header_names="@HEADER@" help="Select a column to create facets (subplots) along the rows of the plot grid. This allows you to compare data across different categories or groups in the selected column."/> + </section> + </xml> + + <token name="@INIT@"> +import pandas as pd +import seaborn as sns +import numpy as np +import matplotlib.pyplot as plt + +file_name = "$input_data" +file_extension = "$input_data.ext" + +transformation = $transformation +output_format = "png" +output_file = "${output_file}" + +# load and transform data +if file_extension == "csv": + df = pd.read_csv(file_name, index_col=index_col) +elif file_extension in ["tsv", "tabular"]: + df = pd.read_csv(file_name, sep="\t", index_col=index_col) +elif file_extension == "parquet": + df = pd.read_parquet(file_name, index_col=index_col) +else: + raise ValueError(f"Unsupported file format: {file_extension}") +data = df.apply(lambda x: transformation(x) if np.issubdtype(x.dtype, np.number) else x) + </token> + + <xml name="creator"> + <creator> + <person + givenName="Helge" + familyName="Hecht" + url="https://github.com/hechth" + identifier="0000-0001-6744-996X" /> + <organization + url="https://www.recetox.muni.cz/" + email="GalaxyToolsDevelopmentandDeployment@space.muni.cz" + name="RECETOX MUNI" /> + </creator> + </xml> + + <xml name="citation"> + <citations> + <citation type="doi">10.21105/joss.03021</citation> + </citations> + </xml> +</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seaborn_pairgrid.xml Wed May 14 08:39:22 2025 +0000 @@ -0,0 +1,117 @@ +<tool id="seaborn_pairgrid" name="seaborn pair-wise scatterplot" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0" license="MIT"> + <description>of all-vs-all column combinations</description> + <macros> + <import>macros.xml</import> + </macros> + + <expand macro="edam"/> + <expand macro="requirements"/> + <expand macro="creator" /> + + <command detect_errors="exit_code"><![CDATA[ + python3 '${run_script}' + ]]></command> + + <configfiles> + <configfile name="run_script"><![CDATA[ +index_col = $index_col + +@INIT@ + +from scipy.stats import gaussian_kde + +def scatter_density(x, y, **kwargs): + kwargs.pop('color') + # Calculate the point density + xy = np.vstack([x, y]) + z = gaussian_kde(xy)(xy) + plt.scatter(x, y, c=z, cmap="jet", **kwargs) + + +g = sns.PairGrid(data) +g.map_lower(sns.regplot, scatter_kws=dict(s=4)) +g.map_lower(sns.kdeplot, levels=4, color=".2") +g.map_upper(scatter_density, s=6) +g.map_diag(sns.histplot) + +plt.savefig(f"{output_file}", format=output_format, dpi=300) + ]]></configfile> + </configfiles> + <inputs> + <expand macro="inputs"/> + <param argument="index_col" type="boolean" truevalue="0" falsevalue="None" checked="false" label="Is the first column the index?" help="Specify whether the first column of the input data should be treated as the index. If selected, the first column will not be used as data for plotting." /> + <expand macro="transformation"/> + </inputs> + <outputs> + <data name="output_file" format="png" label="${tool.name} on ${on_string}" /> + </outputs> + <tests> + <!-- Test 1: Generate a pairgrid plot with default settings --> + <test> + <param name="input_data" value="mtcars.txt" /> + <output name="output_file"> + <assert_contents> + <has_image_channels channels="4"/> + <has_image_height height="8250"/> + <has_image_width width="8250" /> + <has_image_center_of_mass center_of_mass="4143.08, 4103.67" eps="0.1"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +This tool generates a scatterplot matrix (pair-wise scatterplots) of all column combinations in the input dataset using the Seaborn library. The scatterplot matrix provides a visual summary of the relationships between variables in the dataset, making it useful for exploratory data analysis. +The tool uses Seaborn's `PairGrid` functionality to create the matrix, with the following features: + +- **Lower Triangle**: Regression plots and kernel density estimates (KDE). +- **Upper Triangle**: Scatterplots with point density coloring. +- **Diagonal**: Histograms of individual variables. + +**Usage** + +1. **Input**: Provide a tabular data file in one of the supported formats (TSV, CSV, TXT, or Parquet). Optionally, specify whether the first column should be treated as the index. +2. **Advanced Options**: Apply transformations to the data (e.g., log10 or log2) before plotting. +3. **Output**: The tool generates a PNG image of the scatterplot matrix, which can be downloaded or used in further analyses. + +**Input** + +- **Input Data Table**: Upload your data file in TSV, CSV, TXT, or Parquet format. The file should contain numerical data for plotting. +- **Index Column**: Specify whether the first column of the input data should be treated as the index. If selected, the first column will not be used for plotting. +- **Data Transformation**: Apply transformations such as log10 or log2 to numerical data before plotting. + +**Output** + +The tool generates a PNG file containing the scatterplot matrix. The file can be downloaded or used as input for other tools in Galaxy. + +**Example Input** + +Here is an example of a simple input dataset: + ++------------+------------+------------+------------+ +| Category | Value1 | Value2 | Value3 | ++============+============+============+============+ +| A | 10 | 20 | 30 | ++------------+------------+------------+------------+ +| B | 15 | 25 | 35 | ++------------+------------+------------+------------+ +| C | 20 | 30 | 40 | ++------------+------------+------------+------------+ + +**Example Output** + +The tool will generate a scatterplot matrix where: +- The lower triangle contains regression plots and KDE plots. +- The upper triangle contains scatterplots with point density coloring. +- The diagonal contains histograms of individual variables. + +**Links** + +- For more information about Seaborn's `PairGrid`, visit the official documentation: https://seaborn.pydata.org/generated/seaborn.PairGrid.html +- For detailed parameter descriptions, refer to the Galaxy tool documentation. + ]]></help> + <expand macro="citation"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mtcars.txt Wed May 14 08:39:22 2025 +0000 @@ -0,0 +1,33 @@ +brand mpg cyl disp hp drat wt qsec vs am gear carb +Mazda RX4 21 6 160 110 3.9 2.62 16.46 0 1 4 4 +Mazda RX4 Wag 21 6 160 110 3.9 2.875 17.02 0 1 4 4 +Datsun 710 22.8 4 108 93 3.85 2.32 18.61 1 1 4 1 +Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 +Hornet Sportabout 18.7 8 360 175 3.15 3.44 17.02 0 0 3 2 +Valiant 18.1 6 225 105 2.76 3.46 20.22 1 0 3 1 +Duster 360 14.3 8 360 245 3.21 3.57 15.84 0 0 3 4 +Merc 240D 24.4 4 146.7 62 3.69 3.19 20 1 0 4 2 +Merc 230 22.8 4 140.8 95 3.92 3.15 22.9 1 0 4 2 +Merc 280 19.2 6 167.6 123 3.92 3.44 18.3 1 0 4 4 +Merc 280C 17.8 6 167.6 123 3.92 3.44 18.9 1 0 4 4 +Merc 450SE 16.4 8 275.8 180 3.07 4.07 17.4 0 0 3 3 +Merc 450SL 17.3 8 275.8 180 3.07 3.73 17.6 0 0 3 3 +Merc 450SLC 15.2 8 275.8 180 3.07 3.78 18 0 0 3 3 +Cadillac Fleetwood 10.4 8 472 205 2.93 5.25 17.98 0 0 3 4 +Lincoln Continental 10.4 8 460 215 3 5.424 17.82 0 0 3 4 +Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4 +Fiat 128 32.4 4 78.7 66 4.08 2.2 19.47 1 1 4 1 +Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 +Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.9 1 1 4 1 +Toyota Corona 21.5 4 120.1 97 3.7 2.465 20.01 1 0 3 1 +Dodge Challenger 15.5 8 318 150 2.76 3.52 16.87 0 0 3 2 +AMC Javelin 15.2 8 304 150 3.15 3.435 17.3 0 0 3 2 +Camaro Z28 13.3 8 350 245 3.73 3.84 15.41 0 0 3 4 +Pontiac Firebird 19.2 8 400 175 3.08 3.845 17.05 0 0 3 2 +Fiat X1-9 27.3 4 79 66 4.08 1.935 18.9 1 1 4 1 +Porsche 914-2 26 4 120.3 91 4.43 2.14 16.7 0 1 5 2 +Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2 +Ford Pantera L 15.8 8 351 264 4.22 3.17 14.5 0 1 5 4 +Ferrari Dino 19.7 6 145 175 3.62 2.77 15.5 0 1 5 6 +Maserati Bora 15 8 301 335 3.54 3.57 14.6 0 1 5 8 +Volvo 142E 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2