diff clustering.xml @ 0:555ca19d07e6 draft default tip

planemo upload for repository https://github.com/goeckslab/tools-st/tree/main/tools/spatialge commit 482b2e0e6ca7aaa789ba07b8cd689da9a01532ef
author goeckslab
date Wed, 13 Aug 2025 19:32:19 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/clustering.xml	Wed Aug 13 19:32:19 2025 +0000
@@ -0,0 +1,174 @@
+<tool id="clustering_spatialGE" name="spatialGE Spatially-Informed Clustering" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.01">
+    <description>Unsupervised spatially-informed clustering on spatialGE spatial transcriptomics data</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <expand macro="spatialge_requirements"/>
+
+    <command detect_errors="aggressive"><![CDATA[
+
+        ##--------------------------
+        ## CLUSTERING
+        ##--------------------------
+
+        Rscript '$__tool_directory__/spatialGE_clust.R'
+
+        --file '$file'
+
+        #if $datatype == 'visium'
+            --visium
+        #end if
+
+        #if $datatype == 'raw'
+            --raw
+        #end if
+
+        #if $datatype == 'cosmx'
+            --cosmx
+        #end if
+
+        #if $samples
+            --samples '$samples'
+        #end if
+
+        #if $spatial_weight
+            --weight '$spatial_weight'
+        #end if
+
+        --dist '$dist_metric'
+
+        #if str($deepsplit.split_type) == 'logical_split':
+            --dslogical
+            --logical '$split_true'
+        #end if
+
+        #if str($deepsplit.split_type) == 'numeric_split':
+            --dsnumeric
+            --numeric '$split_numeric'
+        #end if
+
+        #if $ptsize
+            --ptsize '$ptsize'
+        #end if
+
+    ]]></command>
+    <inputs>
+        <param name="file" type="data" format="rds" label="STlist.rds output from spatialGE preprocessing tool" />
+        <param name="datatype" type="select" label="Original data type from spatialGE Preprocessing" >
+            <option value="visium">Visium</option>
+            <option value="raw">Raw Data</option>
+            <option value="cosmx">CosMX-SMI</option>
+        </param>
+        <param name="samples" type="text" optional="true" label="Sample(s) to perform clustering (comma separated list)" />
+        <param name="spatial_weight" type="float" min="0" max="1" value="0.025" optional="true" label="Weight to be applied to spatial distances" />
+        <param name="dist_metric" type="select" label="Distance metric options" >
+            <option value="euclidean" selected="true">Euclidean</option>
+            <option value="manhattan">Manhattan</option>
+            <option value="maximum">Maximum</option>
+            <option value="minkowski">Minkowski</option>
+            <option value="canberra">Canberra</option>
+        </param>
+        <conditional name="deepsplit">
+            <param name="split_type" type="select" label="Optional: specify deepSplit parameter for cluster splitting sensitivity" >
+                <option value="no_split" selected="true">Do not change cluster sensitivity</option>
+                <option value="logical_split">Logical split: generally perform more sensitive clustering</option>
+                <option value="numeric_split">Numeric split: specify clustering sensitivity between 0-4</option>
+            </param>
+            <when value="no_split">
+            </when>
+            <when value="logical_split">
+                <param name="split_true" type="boolean" label="Switch to Yes for logical deepSplit" />           
+            </when>
+            <when value="numeric_split">
+                <param name="split_numeric" type="float" min="0" max="4" label="Specify number between 0-4 for deepSplit (higher value equals higher sensitivity)" />
+            </when>
+        </conditional>
+        <param name="ptsize" type="float" min="0" max="5" value="2.75" optional="true" label="Specify plot point size" />
+    </inputs>
+    <outputs>
+        <collection name="cluster_plots" type="list" label="Cluster Plots">
+            <discover_datasets pattern="__name_and_ext__" directory="./cluster_plots" ext="png" />
+            <filter>cluster_plots['plot'] == "cluster_plots"</filter>
+        </collection>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="file" value="STobj_lung6.rds" />
+            <param name="datatype" value="cosmx" />
+            <param name="samples" value="Lung6_fov_4" />
+            <output_collection name="cluster_plots" >
+                <element name="clustered_Lung6_fov_4" file="clustered_Lung6_fov_4.png" compare="sim_size" />
+            </output_collection>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="file" value="STobj_lung5.rds" />
+            <param name="datatype" value="cosmx" />
+            <param name="samples" value="Lung5_fov_2" />
+            <conditional name="deepsplit">
+                <param name="split_type" value="logical_split" />
+                    <param name="split_true" value="TRUE" />
+            </conditional>
+            <output_collection name="cluster_plots">
+                <element name="clustered_Lung5_fov_2" file="clustered_Lung5_fov_2.png" compare="sim_size" />
+            </output_collection>
+        </test>
+    </tests>    
+    <help>
+        <![CDATA[
+    **What it does**
+
+    spatialGE is a tool designed for the analysis and visualization of spatially-resolved transcriptomics data.
+
+    spatialGE Spatially-Informed Clustering is built to perform unsupervised spatially-informed clustering on the spots/cells of spatial transcriptomics data and the expression of the top variable genes.
+    This tool will take the output of `spatialGE Preprocessing` and calculate a distance (defaults to euclidean) between spots/cells based on spatial coordinates.
+    The distances are weighted and hierarchical clustering is performed on the sum of the weighted distance matrices. This allows for a better understanding
+    of the tissue architecture by identifying tissue niches/domains that are spatially cohesive, according to spot/cell clustering.
+
+    **Input**
+
+    Currently, the only accepted input is an .rds file as output by `spatialGE Preprocessing`. The preprocessing step creates an `STlist`, spatialGE's required data type.
+    Please first perform preprocessing to transform spatial transcriptomics data, then use the output **STlist.rds** file as input for this tool.
+
+    It is also required to specify the original data type. Please select either Visium, Raw Data, or CosMX-SMI
+    as the data type corresponding to the  `spatialGE Preprocessing` input.
+    
+    **Optional Settings**
+
+    - Sample(s) to perform clustering (comma separated list):
+
+        - To perform clustering and plotting on only a subset of samples from the STlist.rds file, input a comma-separated list of unique sample names matching those in the input file
+        - Visium Example: sample_094c,sample_094d
+        - Raw Data Example: ST_mel3_rep1,ST_mel2_rep1
+        - CosMS-SMI Example: Lung5_fov_2,Lung5_fov_11
+
+    - Weight to be applied to spatial distances:
+
+        - Any number between 0-1 indicating the weight to be applied to spatial distances. A weight closer to 0 indicates that little spatial information is used, and clustering relies on gene expression profiles. A weight closer to 1 indicates that spatial information is heavily used, with little reliance on gene expression. The default value here is 0.025, using mostly gene expression with a spatial smoothing effect.
+
+    - Distance metric options:
+
+            List of distance metric options from `wordspace::dist.matrix`. Different metrics compare expression profiles and spatial coordinates differently.
+            
+            - Euclidean (default)
+            - Manhattan
+            - Maximum
+            - Minkowski
+            - Canberra
+    
+    - deepSplit for cluster sensitivity:
+
+            Choose between logical or numeric deepSplit to provide a rough control over sensitivity to cluster splitting. The higher the value (or if TRUE), the more and smaller clusters will be produced.
+
+            - Logical: set deepSplit to TRUE, cluster sensitivity will increase
+
+            - Numeric: control deepSplit between 0-4, closer to 4 indicate shigher cluster resolution
+
+    - Specify plot point size:
+
+        - Increase or decrease size of points when plotting, between 0-5. Defaults to 2.75.
+
+        ]]>
+    </help>
+    <expand macro="citations"/>
+</tool>