view scimap_spatial.xml @ 0:42e6c251bfd0 draft

planemo upload for repository https://github.com/goeckslab/tools-mti/tree/main/tools/scimap commit b19cb55dfb751cccc857b95a432890299bfeebb5
author goeckslab
date Tue, 19 Jul 2022 20:30:34 +0000
parents
children d19c068c2490
line wrap: on
line source

<tool id="scimap_spatial" name="Spatial Analysis Tools" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>from Scimap</description>
    <macros>
        <import>main_macros.xml</import>
    </macros>

    <expand macro="scimap_requirements"/>
    <expand macro="macro_stdio" />
    <version_command>echo "@VERSION@"</version_command>
    <command>
        <![CDATA[
        python '$__tool_directory__/scimap_spatial.py'
            --inputs '$inputs'
            --anndata '$anndata'
            --output '$output'

        ]]>
    </command>
    <configfiles>
        <inputs name="inputs" />
    </configfiles>
    <inputs>
        <param name="anndata" type="data" format="h5ad" label="Select the input anndata" />
        <conditional name="analyses">
            <param name="selected_tool" type="select" label="Select an analysis">
                <option value="cluster">Cluster</option>
                <option value="spatial_aggregate">Spatial Aggregate -- find regions of aggregration of similar cells</option>
                <option value="spatial_count" selected="true">Spatial Count -- compute a neighbourhood matrix using any categorical variables (e.g. cell-types)</option>
                <option value="spatial_distance">Spatial Distance -- calculate the average shortest between phenotypes or clusters of interest</option>
                <option value="spatial_expression">Spatial Expression -- o compute a neighbourhood weighted matrix based on the expression values</option>
                <option value="spatial_interaction">Spatial Interaction -- computes how likely celltypes are found next to each another compared to random background</option>
                <option value="spatial_lda">Spatial LDA -- compute a neighbourhood matrix using any categorical variable and then perform Latent Dirichlet Allocation (LDA) modelling</option>
                <option value="spatial_pscore">Spatial pscore -- a scoring system to evaluate user defined proximity between cell types</option>
            </param>
            <when value="cluster">
                <param name="method" type="select" label="Select the clustering method">
                    <option value="kmeans" selected="true">kmeans</option>
                    <option value="phenograph">phenograph</option>
                    <option value="leiden">leiden</option>
                    <option value="parc">parc</option>
                </param>
                <section name="options" title="Advanced Options" expanded="false">
                    <param argument="subset_genes" type="text" value="" optional="true" label="Type in a list of genes  that should be included for the purpose of clustering" help="Optional. Comma delimited. By default the algorithm uses all genes in the dataset." />
                    <param argument="sub_cluster" type="boolean" checked="false" optional="true" label="Whether to do sub-clustering on existing clustering or phenotyping " />
                    <param argument="sub_cluster_column" type="text" value="phenotype" optional="true" label="Type in the name of a column to be sub-clustered" help="This is only required when sub_cluster is set to True." />
                    <param argument="sub_cluster_group" type="text" value="" optional="true" label="Type in a list of group names within the sub-cluster column" help="Optional. Comma delimited. By default the program will sub-cluster all groups within column passed through the argument sub_cluster_column." />
                    <param argument="parc_small_pop" type="integer" value="50" optional="true" label="Smallest cluster population to be considered a community in PARC clustering" />
                    <param argument="parc_too_big_factor" type="float" value="0.4" optional="true" label="If a cluster exceeds this share of the entire cell population, then the PARC will be run on the large cluster" />
                    <param argument="k" type="integer" value="10" optional="true" label="Number of clusters to return when using K-Means clustering" />
                    <param argument="n_pcs" type="integer" value="" optional="true" label="Number of PC's to be used in leiden clustering" help="By default it uses all PC's" />
                    <param argument="resolution" type="float" value="1" optional="true" label="A parameter value controlling the coarseness of the clustering" help="Higher values lead to more clusters." />
                    <param argument="phenograph_clustering_metric" type="select" label="Distance metric to define nearest neighbors">
                        <option value="euclidean" selected="true">euclidean</option>
                        <option value="cityblock">cityblock</option>
                        <option value="cosine">cosine</option>
                        <option value="manhattan">manhattan</option>
                        <option value="braycurtis">braycurtis</option>
                        <option value="canberra">canberra</option>
                        <option value="chebyshev">chebyshev</option>
                        <option value="correlation">correlation</option>
                        <option value="dice">dice</option>
                        <option value="hamming">hamming</option>
                        <option value="jaccard">jaccard</option>
                        <option value="mahalanobis">mahalanobis</option>
                        <option value="minkowski">minkowski</option>
                        <option value="rogerstanimoto">rogerstanimoto</option>
                        <option value="russellrao">russellrao</option>
                        <option value="seuclidean">seuclidean</option>
                        <option value="sokalmichener">sokalmichener</option>
                        <option value="sokalsneath">sokalsneath</option>
                        <option value="sqeuclidean">sqeuclidean</option>
                        <option value="yule">yule</option>    
                    </param>
                    <param argument="nearest_neighbors" type="integer" value="30" optional="true" label="Number of nearest neighbors to use in first step of graph construction" help="This parameter is used both in leiden and phenograph clustering." />
                    <param argument="use_raw" type="boolean" checked="true" optional="true" label="Whether to use raw data for clustering" help=" If False, normalized/scaled data within adata.X will be used." />
                    <!-- <param argument="log" type="boolean" checked="true" optional="true" label="Whether to log the raw data" help="Set use_raw = True for this to take effect." /> -->
                    <param argument="random_state" type="integer" value="0" optional="true" help="Used to change the initialization of the optimization." />
                    <param argument="collapse_labels" type="boolean" checked="false" optional="true" help="While sub clustering only a few phenotypes/clusters, this argument helps to group all the other phenotypes/clusters into a single category- Helps in visualisation." />
                    <param argument="label" type="text" value="" optional="true" label="Column name for the returned data" help="Stored in adata.obs. The default is adata.obs [method used]." />
                    <!-- <param argument="output_dir"> -->
                </section>
            </when>
            <when value="spatial_aggregate">
                <expand macro="scimap_spatial_options" label="spatial_aggregate">
                    <param argument="phenotype" type="text" value="phenotype" optional="flase" label="Column name of the column containing the phenotype information" />
                    <param argument="purity" type="integer" value="60" min="1" max="100" label="Percent purity of neighbouring cells" help="e.g. if 60 is chosen, every neighbourhood is tested such that if a particular phenotype makes up greater than 60% of the total population it is annotated to be an aggregate of that particular phenotype." />
                    <param argument="method" type="select" label="Select the method">
                        <option value="radius" selected="true">radius</option>
                        <option value="knn">knn</option>
                    </param>
                    <param argument="radius" type="integer" value="30" optional="true" label="The radius used to define a local neighbhourhood" />
                    <param argument="knn" type="integer" value="10" optional="true" label="Number of cells considered for defining the local neighbhourhood" />
                </expand>
            </when>
            <when value="spatial_count">
                <expand macro="scimap_spatial_options" label="spatial_count">
                    <param argument="phenotype" type="text" value="phenotype" optional="flase" label="Column name of the column containing the phenotype information" />
                    <param argument="method" type="select" label="Select the method">
                        <option value="radius" selected="true">radius</option>
                        <option value="knn">knn</option>
                    </param>
                    <param argument="radius" type="integer" value="30" optional="true" label="The radius used to define a local neighbhourhood" />
                    <param argument="knn" type="integer" value="10" optional="true" label="Number of cells considered for defining the local neighbhourhood" />
                </expand>
            </when>
            <when value="spatial_distance">
                <expand macro="scimap_spatial_options">
                    <param argument="phenotype" type="text" value="phenotype" optional="flase" label="Column name of the column containing the phenotype information" />
                </expand>
            </when>
            <when value="spatial_expression">
                <expand macro="scimap_spatial_options" label="spatial_expression">
                    <param argument="method" type="select" label="Select the method">
                        <option value="radius" selected="true">radius</option>
                        <option value="knn">knn</option>
                    </param>
                    <param argument="radius" type="integer" value="30" optional="true" label="The radius used to define a local neighbhourhood" />
                    <param argument="knn" type="integer" value="10" optional="true" label="Number of cells considered for defining the local neighbhourhood" />
                    <param argument="use_raw" type="boolean" checked="true" optional="true" label="Whether to use raw data for clustering" help=" If False, normalized/scaled data within adata.X will be used." />
                    <!-- <param argument="log" type="boolean" checked="true" optional="true" label="Whether to log the raw data" help="Set use_raw = True for this to take effect." /> -->
                </expand>
            </when>
            <when value="spatial_interaction">
                <expand macro="scimap_spatial_options" label="spatial_interaction">
                    <param argument="phenotype" type="text" value="phenotype" optional="flase" label="Column name of the column containing the phenotype information" />
                    <param argument="method" type="select" label="Select the method">
                        <option value="radius" selected="true">radius</option>
                        <option value="knn">knn</option>
                    </param>
                    <param argument="radius" type="integer" value="30" optional="true" label="The radius used to define a local neighbhourhood" />
                    <param argument="knn" type="integer" value="10" optional="true" label="Number of cells considered for defining the local neighbhourhood" />
                    <param argument="permutation" type="integer" value="1000" optional="true" label="The number of permutations to be performed for calculating the P-Value" />
                    <param argument="pval_method" type="select" label="Select a method to calculate the P-values">
                        <option value="zscore" selected="true">zscore</option>
                        <option value="histocat">histocat</option>
                    </param>
                </expand>
            </when>
            <when value="spatial_lda">
                <expand macro="scimap_spatial_options" label="spatial_lda">
                    <param argument="phenotype" type="text" value="phenotype" optional="flase" label="Column name of the column containing the phenotype information" />
                    <param argument="method" type="select" label="Select the method">
                        <option value="radius" selected="true">radius</option>
                        <option value="knn">knn</option>
                    </param>
                    <param argument="radius" type="integer" value="30" optional="true" label="The radius used to define a local neighbhourhood" />
                    <param argument="knn" type="integer" value="10" optional="true" label="Number of cells considered for defining the local neighbhourhood" />
                    <param argument="num_motifs" type="integer" value="10" optional="true" label="The number of requested latent motifs to be extracted from the training corpus" />
                    <param argument="random_state" type="integer" value="0" optional="true" label="The seed number for random state" />
                </expand>
            </when>
            <when value="spatial_pscore">
                <expand macro="scimap_spatial_options" label="spatial_pscore">
                    <param argument="proximity" type="text" value="" optional="false" label="Type in the list of cell-types for which the proximity score needs to calculated" help="Comma delimited. e.g.: CellType-A,CellType-B." />
                    <param argument="score_by" type="text" value="imageid" optional="true" label="Column name containing region's of interest for score comparison" help="Optional. By default the score is calculated across the entire image." />
                    <param argument="phenotype" type="text" value="phenotype" optional="flase" label="Column name of the column containing the phenotype information" />
                    <param argument="method" type="select" label="Select the method">
                        <option value="radius" selected="true">radius</option>
                        <option value="knn">knn</option>
                    </param>
                    <param argument="radius" type="integer" value="20" optional="true" label="The radius used to define a local neighbhourhood" />
                    <param argument="knn" type="integer" value="3" optional="true" label="Number of cells considered for defining the local neighbhourhood" />
                </expand>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format="h5ad" name="output" label="Scimap.tools.${analyses.selected_tool} on ${on_string}" />
    </outputs>
    <tests>
        <test>
            <param name="anndata" value="imc.h5ad" />
            <conditional name="analyses">
                <param name="selected_tool" value="cluster" />
                <param name="method" value="kmeans" />
            </conditional>
            <output name="output">
                <assert_contents>
                    <has_h5_keys keys="obs/kmeans" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="anndata" value="tutorial_data_pheno.h5ad" />
            <conditional name="analyses">
                <param name="selected_tool" value="spatial_aggregate" />
            </conditional>
            <output name="output">
                <assert_contents>
                    <has_h5_keys keys="obs/spatial_aggregate" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help>
        <![CDATA[
**What it does**

This tool does various single cell spatial analyses with Scimap.

**Input**

AnnData.

**Output**

Anndata with a corresponding key added.


        ]]>
    </help>
    <citations>
    </citations>
</tool>