view rcx_upsetplot.xml @ 0:ec6141f6e5cb draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/upsetr commit 4454c30bfe5d6ed95871729c68938880a14aa9f2
author iuc
date Thu, 15 May 2025 13:34:06 +0000
parents
children
line wrap: on
line source

<tool id="rcx_upsetplot" name="upset plot" version="@TOOL_VERSION@+galaxy0" profile="23.0">
    <description>Upset plot visualization tool using UpSetR</description>
    <macros>
        <token name="@TOOL_VERSION@">1.4.0</token>
    </macros>

    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">r-upsetr</requirement>
        <requirement type="package" version="19.0.0">r-arrow</requirement>
    </requirements>
    <required_files>
        <include path="utils.r" />
    </required_files>

    <creator>
        <person
            givenName="Kristina"
            familyName="Gomoryova"
            url="https://github.com/KristinaGomoryova"
            identifier="0000-0003-4407-3917" />
        <person
            givenName="Helge"
            familyName="Hecht"
            url="https://github.com/hechth"
            identifier="0000-0001-6744-996X" />
        <organization
            url="https://www.recetox.muni.cz/"
            email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
            name="RECETOX MUNI" />
    </creator>

    <command detect_errors="exit_code"><![CDATA[
        Rscript '${run_script}'
        #if $export_R_script
        && cat ${run_script} >> $script
        #end if
    ]]></command>

    <configfiles>
        <configfile name="run_script"><![CDATA[

        load_data <- function(file_name, file_extension) {
            if (file_extension == "csv") {
                data_input <- read.csv(file_name, check.names = "false")
            } else if (file_extension %in% c("tsv", "tabular")) {
                data_input <- read.delim(file_name, sep = "\t", check.names = "false")
            } else if (file_extension == "parquet") {
                data_input <- arrow::read_parquet(file_name)
            } else {
                stop("Unsupported file format.")
            }
            return(data_input)
        }

        file_name <- "$input_data"
        file_extension <- "$input_data.ext"
        data_input <- load_data(file_name, file_extension)
        
        data_input[data_input != 0] <- 1

        png("upsetplot.png", width = 12, height = 12, units = 'in', res = 600)
        p <- UpSetR::upset(
            data_input,
            #if $order_by == "both"
            order.by = c('freq', 'degree'),
            #else
            order.by = "$order_by",
            #end if
            nsets = $nsets,
            #if $nintersects == 0
            nintersects = NA,
            #else 
            nintersects = $nintersects,
            #end if
            group.by = '$group_by',
            #if $cutoff
            cutoff = $cutoff,
            #end if
            empty.intersections = $empty_intersections
        )
        print(p)
        dev.off()
        ]]></configfile>
    </configfiles>

    <inputs>
        <param name="input_data" type="data" format="csv,tsv,tabular,parquet" label="Input Data Table" help="Input file in a tabular/tsv/csv/parquet format containing the data for the UpSet plot."/>
        <param type="select" name="order_by" label="Sort Bars By" help="Choose how to sort the bars in the UpSet plot: by frequency (largest frequency first), degree (largest overlap first), or both.">
            <option value="freq" selected="true">Frequency</option> 
            <option value="degree">Degree</option>
            <option value="both">Both</option>
        </param>
        <param name="nsets" type="integer" min="0" max="100" value="5" label="Number of Sets to Include" 
               help="Specify the number of sets to include in the plot. The largest sets will be visualized first. Default is 5." />
        <param label="Number of Intersections to Plot" optional="true" name="nintersects" type="integer" value="0" help="Specify the number of intersections to display in the plot. If set to 0, all intersections will be plotted."/>  
        <param name="group_by" type="boolean" checked="false" truevalue="sets" falsevalue="degree" label="Group Bars by Sets" help="Enable this option to group bars for each set based on their set size. Otherwise, bars will be grouped by degree."/>   
        <param name="empty_intersections" type="boolean" checked="false" truevalue="TRUE" falsevalue="NULL" label="Include Empty Intersections" help="Enable this option to include empty intersections in the plot, up to the limit of the number of intersections. By default, empty intersections are omitted."/>   
        <param name="cutoff" type="integer" min="0" label="Intersection Size Cutoff" optional="true" 
               help="Specify a threshold for filtering intersections. Only intersections with a size greater than or equal to this value will be included in the plot. Leave empty to include all intersections." />
        <param name="export_R_script" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Export R Script for Reproducibility" help="Enable this option to export the R script used to generate the plot. This allows you to reproduce the analysis offline. Note that file paths and dependencies must be managed manually."/>
    </inputs>

    <outputs>
        <data name="upsetplot" format="png" label="Upset plot on ${on_string}" from_work_dir="upsetplot.png"/>        
        <data name="script" format="txt" label="R script">
            <filter>export_R_script</filter>
        </data>
    </outputs>

    <tests>
        <test expect_num_outputs="1">
            <param name="input_data" value="upsetplot_test_data.txt" ftype="tabular"/>
            <param name="order_by" value="freq"/>
            <!-- <output name="upsetplot" ftype="png" file="upsetplot.png"/> -->
            <output name="upsetplot" ftype="png">
                <assert_contents>
                    <has_image_channels channels="1"/>
                    <has_image_height height="7200"/>
                    <has_image_width width="7200" />
                    <has_image_center_of_mass center_of_mass="3642.53, 3525.80" eps="0.1"/> 
                </assert_contents>
            </output>
        </test>
    </tests>

    <help><![CDATA[
recetox-upsetplot Help
======================

Overview
--------

The `recetox-upsetplot` tool generates **UpSet plots**, a visualization technique for analyzing set intersections. UpSet plots are similar to Venn diagrams but are particularly advantageous when working with more than three sets, where Venn diagrams become overly complex and difficult to interpret.

An UpSet plot consists of three main components:
1. **Set Sizes (Left)**: A bar chart showing the size of each individual set.
2. **Intersections (Center)**: A matrix of black dots indicating which sets are part of each intersection.
3. **Intersection Sizes (Top)**: A bar chart showing the size of each intersection.

For more information, refer to the official [UpSetR documentation](https://github.com/hms-dbmi/UpSetR).

Input
-----

The tool expects a **dataframe** in one of the following formats:
- **CSV**
- **Tabular**
- **Parquet**

Each column in the input corresponds to a set, and intersections are calculated based on the columns. If the input is not a binary matrix (i.e., values are not 0 or 1), all non-zero values will automatically be converted to 1.

### Example Input
| Set1 | Set2 | Set3 |
| 1    | 0    | 1    |
| 0    | 1    | 1    |
| 1    | 1    | 0    |

### Example Output
The tool will generate an UpSet plot visualizing the intersections between `Set1`, `Set2`, and `Set3`.

Parameters
----------

The following options allow you to customize the plot:

- **Sort Bars By** (`sort_by`): Determines how the intersection bars are sorted. Options:
  - `freq`: Sort by intersection size (default).
  - `degree`: Sort by the number of sets involved in the intersection.

- **Empty Intersections** (`empty_intersections`): Whether to include intersections with zero elements in the plot. Default is to hide them.

- **Number of Intersections** (`nintersects`): Limits the number of intersections displayed. Default is `0`, which shows all intersections.

- **Group by Sets** (`group_by_sets`): If enabled, intersections are grouped by the original sets. By default, intersections are grouped by the number of sets involved.

Output
------

The tool generates the following outputs:
1. **UpSet Plot (PNG)**: A high-resolution PNG image of the UpSet plot.
2. **R Script (Optional)**: The R script used to generate the plot, if the `Export R Script` option is enabled.

Links
-----

- **UpSetR Documentation**: [https://github.com/hms-dbmi/UpSetR](https://github.com/hms-dbmi/UpSetR)
- **Galaxy Tool Repository**: [https://github.com/galaxyproject/tools-iuc](https://github.com/galaxyproject/tools-iuc)

    ]]></help>
    <citations>
        <citation type="doi">https://doi.org/10.1093/bioinformatics/btx364</citation>
    </citations>
</tool>