changeset 0:4a46d83cb08f draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/seaborn commit 24dc6373560bd5e409fca84154634f5a528001c3
author iuc
date Wed, 14 May 2025 08:39:22 +0000
parents
children
files macros.xml seaborn_pairgrid.xml test-data/mtcars.txt
diffstat 3 files changed, 237 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Wed May 14 08:39:22 2025 +0000
@@ -0,0 +1,87 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.13.2</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+
+    <xml name="edam">
+        <edam_topics>
+            <edam_topic>topic_0092</edam_topic>
+        </edam_topics>
+        <edam_operations>
+            <edam_operation>operation_0337</edam_operation>
+        </edam_operations>
+    </xml>
+
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">seaborn</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+
+    <xml name="inputs">
+        <param argument="--input_data" type="data" format="tsv,tabular,csv,parquet" label="Input data table" help="Provide the input data file in one of the supported formats: TSV, TXT, CSV, or Parquet. This file will be used to generate the plot." />
+    </xml>
+
+    <xml name="transformation">
+        <param name="transformation" type="select" label="Transformation" help="Choose a transformation function to apply to the numerical data in the input file. This can be useful for scaling or normalizing the data before plotting.">
+            <option value="lambda x: x" selected="true">no transformation</option>
+            <option value="np.log10">log10</option>
+            <option value="np.log2">log2</option>
+        </param>
+    </xml>
+
+    <xml name="columns" tokens="header">
+        <param name="xcol" type="data_column" data_ref="input_data" label="x-axis" optional="true" use_header_names="@HEADER@" help="Select the column from the input data to use for the x-axis of the plot."/>
+        <param name="ycol" type="data_column" data_ref="input_data" label="y-axis" optional="true" use_header_names="@HEADER@" help="Select the column from the input data to use for the y-axis of the plot."/>
+        <section name="advanced_input" title="Advanced">
+            <param name="hue" type="data_column" data_ref="input_data" label="hue" optional="true" use_header_names="@HEADER@" help="Select a column to group data by color (hue) in the plot. This is useful for visualizing categorical data."/>
+            <param name="col" type="data_column" data_ref="input_data" label="column-facetting" optional="true" use_header_names="@HEADER@" help="Select a column to create facets (subplots) along the columns of the plot grid. This is useful for visualizing how data varies across different categories or groups in the selected column."/>
+            <param name="row" type="data_column" data_ref="input_data" label="row-facetting" optional="true" use_header_names="@HEADER@" help="Select a column to create facets (subplots) along the rows of the plot grid. This allows you to compare data across different categories or groups in the selected column."/>
+        </section>
+    </xml>
+
+    <token name="@INIT@">
+import pandas as pd
+import seaborn as sns
+import numpy as np
+import matplotlib.pyplot as plt
+
+file_name = "$input_data"
+file_extension = "$input_data.ext"
+
+transformation = $transformation
+output_format = "png"
+output_file = "${output_file}"
+
+# load and transform data
+if file_extension == "csv":
+    df = pd.read_csv(file_name, index_col=index_col)
+elif file_extension in ["tsv", "tabular"]:
+    df = pd.read_csv(file_name, sep="\t", index_col=index_col)
+elif file_extension == "parquet":
+    df = pd.read_parquet(file_name, index_col=index_col)
+else:
+    raise ValueError(f"Unsupported file format: {file_extension}")
+data = df.apply(lambda x: transformation(x) if np.issubdtype(x.dtype, np.number) else x)
+    </token>
+
+    <xml name="creator">
+        <creator>
+            <person
+                givenName="Helge"
+                familyName="Hecht"
+                url="https://github.com/hechth"
+                identifier="0000-0001-6744-996X" />
+            <organization
+                url="https://www.recetox.muni.cz/"
+                email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
+                name="RECETOX MUNI" />
+        </creator>
+    </xml>
+
+    <xml name="citation">
+        <citations>
+            <citation type="doi">10.21105/joss.03021</citation>
+        </citations>
+    </xml>
+</macros>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/seaborn_pairgrid.xml	Wed May 14 08:39:22 2025 +0000
@@ -0,0 +1,117 @@
+<tool id="seaborn_pairgrid" name="seaborn pair-wise scatterplot" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0" license="MIT">
+    <description>of all-vs-all column combinations</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <expand macro="edam"/>
+    <expand macro="requirements"/>
+    <expand macro="creator" />
+
+    <command detect_errors="exit_code"><![CDATA[
+        python3 '${run_script}'
+    ]]></command>
+
+    <configfiles>
+        <configfile name="run_script"><![CDATA[
+index_col = $index_col
+
+@INIT@
+
+from scipy.stats import gaussian_kde
+
+def scatter_density(x, y, **kwargs):
+    kwargs.pop('color')
+    # Calculate the point density
+    xy = np.vstack([x, y])
+    z = gaussian_kde(xy)(xy)
+    plt.scatter(x, y, c=z, cmap="jet", **kwargs)
+
+
+g = sns.PairGrid(data)
+g.map_lower(sns.regplot, scatter_kws=dict(s=4))
+g.map_lower(sns.kdeplot, levels=4, color=".2")
+g.map_upper(scatter_density, s=6)
+g.map_diag(sns.histplot)
+
+plt.savefig(f"{output_file}", format=output_format, dpi=300)
+        ]]></configfile>
+    </configfiles>
+    <inputs>
+        <expand macro="inputs"/>
+        <param argument="index_col" type="boolean" truevalue="0" falsevalue="None" checked="false" label="Is the first column the index?" help="Specify whether the first column of the input data should be treated as the index. If selected, the first column will not be used as data for plotting." />
+        <expand macro="transformation"/>
+    </inputs>
+    <outputs>
+        <data name="output_file" format="png" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <!-- Test 1: Generate a pairgrid plot with default settings -->
+        <test>
+            <param name="input_data" value="mtcars.txt" />
+            <output name="output_file">
+                <assert_contents>
+                    <has_image_channels channels="4"/>
+                    <has_image_height height="8250"/>
+                    <has_image_width width="8250" />
+                    <has_image_center_of_mass center_of_mass="4143.08, 4103.67" eps="0.1"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+This tool generates a scatterplot matrix (pair-wise scatterplots) of all column combinations in the input dataset using the Seaborn library. The scatterplot matrix provides a visual summary of the relationships between variables in the dataset, making it useful for exploratory data analysis.
+The tool uses Seaborn's `PairGrid` functionality to create the matrix, with the following features:
+
+- **Lower Triangle**: Regression plots and kernel density estimates (KDE).
+- **Upper Triangle**: Scatterplots with point density coloring.
+- **Diagonal**: Histograms of individual variables.
+
+**Usage**
+
+1. **Input**: Provide a tabular data file in one of the supported formats (TSV, CSV, TXT, or Parquet). Optionally, specify whether the first column should be treated as the index.
+2. **Advanced Options**: Apply transformations to the data (e.g., log10 or log2) before plotting.
+3. **Output**: The tool generates a PNG image of the scatterplot matrix, which can be downloaded or used in further analyses.
+
+**Input**
+
+- **Input Data Table**: Upload your data file in TSV, CSV, TXT, or Parquet format. The file should contain numerical data for plotting.
+- **Index Column**: Specify whether the first column of the input data should be treated as the index. If selected, the first column will not be used for plotting.
+- **Data Transformation**: Apply transformations such as log10 or log2 to numerical data before plotting.
+
+**Output**
+
+The tool generates a PNG file containing the scatterplot matrix. The file can be downloaded or used as input for other tools in Galaxy.
+
+**Example Input**
+
+Here is an example of a simple input dataset:
+
++------------+------------+------------+------------+
+| Category   | Value1     | Value2     | Value3     |
++============+============+============+============+
+| A          | 10         | 20         | 30         |
++------------+------------+------------+------------+
+| B          | 15         | 25         | 35         |
++------------+------------+------------+------------+
+| C          | 20         | 30         | 40         |
++------------+------------+------------+------------+
+
+**Example Output**
+
+The tool will generate a scatterplot matrix where:
+- The lower triangle contains regression plots and KDE plots.
+- The upper triangle contains scatterplots with point density coloring.
+- The diagonal contains histograms of individual variables.
+
+**Links**
+
+- For more information about Seaborn's `PairGrid`, visit the official documentation: https://seaborn.pydata.org/generated/seaborn.PairGrid.html
+- For detailed parameter descriptions, refer to the Galaxy tool documentation.
+    ]]></help>
+    <expand macro="citation"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mtcars.txt	Wed May 14 08:39:22 2025 +0000
@@ -0,0 +1,33 @@
+brand	mpg	cyl	disp	hp	drat	wt	qsec	vs	am	gear	carb
+Mazda RX4	21	6	160	110	3.9	2.62	16.46	0	1	4	4
+Mazda RX4 Wag	21	6	160	110	3.9	2.875	17.02	0	1	4	4
+Datsun 710	22.8	4	108	93	3.85	2.32	18.61	1	1	4	1
+Hornet 4 Drive	21.4	6	258	110	3.08	3.215	19.44	1	0	3	1
+Hornet Sportabout	18.7	8	360	175	3.15	3.44	17.02	0	0	3	2
+Valiant	18.1	6	225	105	2.76	3.46	20.22	1	0	3	1
+Duster 360	14.3	8	360	245	3.21	3.57	15.84	0	0	3	4
+Merc 240D	24.4	4	146.7	62	3.69	3.19	20	1	0	4	2
+Merc 230	22.8	4	140.8	95	3.92	3.15	22.9	1	0	4	2
+Merc 280	19.2	6	167.6	123	3.92	3.44	18.3	1	0	4	4
+Merc 280C	17.8	6	167.6	123	3.92	3.44	18.9	1	0	4	4
+Merc 450SE	16.4	8	275.8	180	3.07	4.07	17.4	0	0	3	3
+Merc 450SL	17.3	8	275.8	180	3.07	3.73	17.6	0	0	3	3
+Merc 450SLC	15.2	8	275.8	180	3.07	3.78	18	0	0	3	3
+Cadillac Fleetwood	10.4	8	472	205	2.93	5.25	17.98	0	0	3	4
+Lincoln Continental	10.4	8	460	215	3	5.424	17.82	0	0	3	4
+Chrysler Imperial	14.7	8	440	230	3.23	5.345	17.42	0	0	3	4
+Fiat 128	32.4	4	78.7	66	4.08	2.2	19.47	1	1	4	1
+Honda Civic	30.4	4	75.7	52	4.93	1.615	18.52	1	1	4	2
+Toyota Corolla	33.9	4	71.1	65	4.22	1.835	19.9	1	1	4	1
+Toyota Corona	21.5	4	120.1	97	3.7	2.465	20.01	1	0	3	1
+Dodge Challenger	15.5	8	318	150	2.76	3.52	16.87	0	0	3	2
+AMC Javelin	15.2	8	304	150	3.15	3.435	17.3	0	0	3	2
+Camaro Z28	13.3	8	350	245	3.73	3.84	15.41	0	0	3	4
+Pontiac Firebird	19.2	8	400	175	3.08	3.845	17.05	0	0	3	2
+Fiat X1-9	27.3	4	79	66	4.08	1.935	18.9	1	1	4	1
+Porsche 914-2	26	4	120.3	91	4.43	2.14	16.7	0	1	5	2
+Lotus Europa	30.4	4	95.1	113	3.77	1.513	16.9	1	1	5	2
+Ford Pantera L	15.8	8	351	264	4.22	3.17	14.5	0	1	5	4
+Ferrari Dino	19.7	6	145	175	3.62	2.77	15.5	0	1	5	6
+Maserati Bora	15	8	301	335	3.54	3.57	14.6	0	1	5	8
+Volvo 142E	21.4	4	121	109	4.11	2.78	18.6	1	1	4	2