view clustering.xml @ 0:555ca19d07e6 draft default tip

planemo upload for repository https://github.com/goeckslab/tools-st/tree/main/tools/spatialge commit 482b2e0e6ca7aaa789ba07b8cd689da9a01532ef
author goeckslab
date Wed, 13 Aug 2025 19:32:19 +0000
parents
children
line wrap: on
line source

<tool id="clustering_spatialGE" name="spatialGE Spatially-Informed Clustering" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.01">
    <description>Unsupervised spatially-informed clustering on spatialGE spatial transcriptomics data</description>
    <macros>
        <import>macros.xml</import>
    </macros>

    <expand macro="spatialge_requirements"/>

    <command detect_errors="aggressive"><![CDATA[

        ##--------------------------
        ## CLUSTERING
        ##--------------------------

        Rscript '$__tool_directory__/spatialGE_clust.R'

        --file '$file'

        #if $datatype == 'visium'
            --visium
        #end if

        #if $datatype == 'raw'
            --raw
        #end if

        #if $datatype == 'cosmx'
            --cosmx
        #end if

        #if $samples
            --samples '$samples'
        #end if

        #if $spatial_weight
            --weight '$spatial_weight'
        #end if

        --dist '$dist_metric'

        #if str($deepsplit.split_type) == 'logical_split':
            --dslogical
            --logical '$split_true'
        #end if

        #if str($deepsplit.split_type) == 'numeric_split':
            --dsnumeric
            --numeric '$split_numeric'
        #end if

        #if $ptsize
            --ptsize '$ptsize'
        #end if

    ]]></command>
    <inputs>
        <param name="file" type="data" format="rds" label="STlist.rds output from spatialGE preprocessing tool" />
        <param name="datatype" type="select" label="Original data type from spatialGE Preprocessing" >
            <option value="visium">Visium</option>
            <option value="raw">Raw Data</option>
            <option value="cosmx">CosMX-SMI</option>
        </param>
        <param name="samples" type="text" optional="true" label="Sample(s) to perform clustering (comma separated list)" />
        <param name="spatial_weight" type="float" min="0" max="1" value="0.025" optional="true" label="Weight to be applied to spatial distances" />
        <param name="dist_metric" type="select" label="Distance metric options" >
            <option value="euclidean" selected="true">Euclidean</option>
            <option value="manhattan">Manhattan</option>
            <option value="maximum">Maximum</option>
            <option value="minkowski">Minkowski</option>
            <option value="canberra">Canberra</option>
        </param>
        <conditional name="deepsplit">
            <param name="split_type" type="select" label="Optional: specify deepSplit parameter for cluster splitting sensitivity" >
                <option value="no_split" selected="true">Do not change cluster sensitivity</option>
                <option value="logical_split">Logical split: generally perform more sensitive clustering</option>
                <option value="numeric_split">Numeric split: specify clustering sensitivity between 0-4</option>
            </param>
            <when value="no_split">
            </when>
            <when value="logical_split">
                <param name="split_true" type="boolean" label="Switch to Yes for logical deepSplit" />           
            </when>
            <when value="numeric_split">
                <param name="split_numeric" type="float" min="0" max="4" label="Specify number between 0-4 for deepSplit (higher value equals higher sensitivity)" />
            </when>
        </conditional>
        <param name="ptsize" type="float" min="0" max="5" value="2.75" optional="true" label="Specify plot point size" />
    </inputs>
    <outputs>
        <collection name="cluster_plots" type="list" label="Cluster Plots">
            <discover_datasets pattern="__name_and_ext__" directory="./cluster_plots" ext="png" />
            <filter>cluster_plots['plot'] == "cluster_plots"</filter>
        </collection>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="file" value="STobj_lung6.rds" />
            <param name="datatype" value="cosmx" />
            <param name="samples" value="Lung6_fov_4" />
            <output_collection name="cluster_plots" >
                <element name="clustered_Lung6_fov_4" file="clustered_Lung6_fov_4.png" compare="sim_size" />
            </output_collection>
        </test>
        <test expect_num_outputs="1">
            <param name="file" value="STobj_lung5.rds" />
            <param name="datatype" value="cosmx" />
            <param name="samples" value="Lung5_fov_2" />
            <conditional name="deepsplit">
                <param name="split_type" value="logical_split" />
                    <param name="split_true" value="TRUE" />
            </conditional>
            <output_collection name="cluster_plots">
                <element name="clustered_Lung5_fov_2" file="clustered_Lung5_fov_2.png" compare="sim_size" />
            </output_collection>
        </test>
    </tests>    
    <help>
        <![CDATA[
    **What it does**

    spatialGE is a tool designed for the analysis and visualization of spatially-resolved transcriptomics data.

    spatialGE Spatially-Informed Clustering is built to perform unsupervised spatially-informed clustering on the spots/cells of spatial transcriptomics data and the expression of the top variable genes.
    This tool will take the output of `spatialGE Preprocessing` and calculate a distance (defaults to euclidean) between spots/cells based on spatial coordinates.
    The distances are weighted and hierarchical clustering is performed on the sum of the weighted distance matrices. This allows for a better understanding
    of the tissue architecture by identifying tissue niches/domains that are spatially cohesive, according to spot/cell clustering.

    **Input**

    Currently, the only accepted input is an .rds file as output by `spatialGE Preprocessing`. The preprocessing step creates an `STlist`, spatialGE's required data type.
    Please first perform preprocessing to transform spatial transcriptomics data, then use the output **STlist.rds** file as input for this tool.

    It is also required to specify the original data type. Please select either Visium, Raw Data, or CosMX-SMI
    as the data type corresponding to the  `spatialGE Preprocessing` input.
    
    **Optional Settings**

    - Sample(s) to perform clustering (comma separated list):

        - To perform clustering and plotting on only a subset of samples from the STlist.rds file, input a comma-separated list of unique sample names matching those in the input file
        - Visium Example: sample_094c,sample_094d
        - Raw Data Example: ST_mel3_rep1,ST_mel2_rep1
        - CosMS-SMI Example: Lung5_fov_2,Lung5_fov_11

    - Weight to be applied to spatial distances:

        - Any number between 0-1 indicating the weight to be applied to spatial distances. A weight closer to 0 indicates that little spatial information is used, and clustering relies on gene expression profiles. A weight closer to 1 indicates that spatial information is heavily used, with little reliance on gene expression. The default value here is 0.025, using mostly gene expression with a spatial smoothing effect.

    - Distance metric options:

            List of distance metric options from `wordspace::dist.matrix`. Different metrics compare expression profiles and spatial coordinates differently.
            
            - Euclidean (default)
            - Manhattan
            - Maximum
            - Minkowski
            - Canberra
    
    - deepSplit for cluster sensitivity:

            Choose between logical or numeric deepSplit to provide a rough control over sensitivity to cluster splitting. The higher the value (or if TRUE), the more and smaller clusters will be produced.

            - Logical: set deepSplit to TRUE, cluster sensitivity will increase

            - Numeric: control deepSplit between 0-4, closer to 4 indicate shigher cluster resolution

    - Specify plot point size:

        - Increase or decrease size of points when plotting, between 0-5. Defaults to 2.75.

        ]]>
    </help>
    <expand macro="citations"/>
</tool>