view cluster_analyze_embed_muon.xml @ 4:78680b96a6ac draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/muon commit fef10e0d450018d1e6ba4cbbda76c7686edc8aae
author iuc
date Sat, 25 Oct 2025 21:00:52 +0000
parents b82f44c4e8af
children
line wrap: on
line source

<tool id="cluster_analyze_embed_muon" name="muon Cluster, analyze, and embed multimodal data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements"/>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[
@COPY_MUDATA@
@CMD@
]]></command>
    <configfiles>
        <configfile name="script_file"><![CDATA[
@CMD_imports@
@CMD_read_inputs@

#if $method.method == 'tl.louvain'
mu.tl.louvain(
    mdata,
    @CMD_params_clustering@
)

#else if $method.method == 'tl.leiden'
mu.tl.leiden(
    mdata,
    @CMD_params_clustering@
)

#else if $method.method == 'tl.mofa'
mu.tl.mofa(
    mdata,
    #if $method.groups_label
    groups_label='$method.groups_label',
    #end if
    use_raw=$method.use_raw,
    #if $method.use_layer
    use_layer='$method.use_layer',
    #end if
    #if $method.use_var
    use_var='$method.use_var',
    #end if
    #if str($method.use_obs) != 'None'
    use_obs='$method.use_obs',
    #end if
    n_factors=$method.n_factors,
    scale_views=$method.scale_views,
    scale_groups=$method.scale_groups,
    center_groups=$method.center_groups,
    ard_weights=$method.ard_weights,
    ard_factors=$method.ard_factors,
    spikeslab_weights=$method.spikeslab_weights,
    spikeslab_factors=$method.spikeslab_factors,
    n_iterations=$method.n_iterations,
    convergence_mode='$method.convergence_mode',
    use_float32=$method.use_float32,
    #if $method.svi.svi_mode == 'yes'
    svi_batch_size=$method.svi.svi_batch_size,
    svi_learning_rate=$method.svi.svi_learning_rate,
    svi_forgetting_rate=$method.svi.svi_forgetting_rate,
    svi_start_stochastic=$method.svi.svi_start_stochastic,
    #end if
    #if $method.smooth_covariate
    smooth_covariate='$method.smooth_covariate',
    #end if
    smooth_warping=$method.smooth_warping,
    seed=$method.seed,
    copy=False
)

#else if $method.method == 'tl.umap'
mu.tl.umap(
    mdata,
    min_dist=$method.min_dist,
    spread=$method.spread,
    n_components=$method.n_components,
    #if str($method.maxiter)
    maxiter=$method.maxiter,
    #end if
    alpha=$method.alpha,
    gamma=$method.gamma,
    negative_sample_rate=$method.negative_sample_rate,
    init_pos='$method.init_pos',
    random_state=$method.random_state,
    #if $method.neighbors_key
    neighbors_key='$method.neighbors_key',
    #end if
    copy=False
)
#end if

@CMD_mudata_write_outputs@
]]></configfile>
    </configfiles>
    <inputs>
        <expand macro="inputs_mudata"/>
        <conditional name="method">
            <param name="method" type="select" label="Method used for processing">
                <option value="tl.leiden">Cluster: Cluster cells using the Leiden algorithm, using 'muon.tl.leiden'</option>
                <option value="tl.louvain">Cluster: Cluster cells using the Louvain algorithm, using 'muon.tl.louvain'</option>
                <option value="tl.mofa">Analyze: Run Multi Omics Factor Analysis, using 'muon.tl.mofa'</option>
                <option value="tl.umap">Embed: Embed the multimodal neighborhood graph using UMAP, using 'muon.tl.umap'</option>
            </param>
            <when value="tl.leiden">
                <expand macro="param_resolution"/>
                <expand macro="param_weight"/>
                <expand macro="param_random_state" seed="0"/>
                <expand macro="param_key_added" key_added="leiden"/>
                <expand macro="param_neighbors_key"/>
                <expand macro="param_directed"/>
            </when>
            <when value="tl.louvain">
                <expand macro="param_resolution"/>
                <expand macro="param_weight"/>
                <expand macro="param_random_state" seed="0"/>
                <expand macro="param_key_added" key_added="louvain"/>
                <expand macro="param_neighbors_key"/>
                <expand macro="param_directed"/>
            </when>
            <when value="tl.mofa">
                <param argument="groups_label" type="text" optional="true" label="a column name in adata.obs for grouping the samples">
                    <expand macro="sanitize_query" />
                </param>
                <expand macro="param_use_raw" label="Use raw slot of AnnData as input values" checked="false"/>
                <param argument="use_layer" type="text" optional="true" label="Use a specific layer of AnnData as input values"
                    help="supersedes use_raw option">
                    <expand macro="sanitize_query" />
                </param>
                <param argument="use_var" type="text" optional="true" label=".var column with a boolean value to select genes"
                    help="e.g. “highly_variable”">
                    <expand macro="sanitize_query" />
                </param>
                <param argument="use_obs" type="select" optional="true" label="strategy to deal with samples (cells) not being the same across modalities"
                    help="Throws error if there are cells that are not same across modalities">
                    <option value="union">union</option>
                    <option value="intersection">intersection</option>
                </param>
                <param name="n_factors" type="integer" value="10" label="Number of factors to train the model with"/>
                <param argument="scale_views" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Scale views to unit variance"/>
                <param argument="scale_groups" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Scale groups to unit variance"/>
                <param argument="center_groups" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Center groups to zero mean"/>
                <param argument="ard_weights" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use view-wise sparsity"/>
                <param argument="ard_factors" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use group-wise sparsity"/>
                <param argument="spikeslab_weights" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use feature-wise sparsity (e.g. gene-wise)"/>
                <param argument="spikeslab_factors" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Use sample-wise sparsity (e.g. cell-wise)"/>
                <param name="n_iterations" type="integer" value="1000" label="Upper limit on the number of iterations"/>
                <param argument="convergence_mode" type="select" label="Convergence mode">
                    <option value="fast" selected="true">fast</option>
                    <option value="medium">medium</option>
                    <option value="slow">slow</option>
                </param>
                <param argument="use_float32" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Use reduced precision"/>
                <conditional name="svi">
                    <param name="svi_mode" type="select" label="Use Stochastic Variational Inference (SVI)?">
                        <option value="yes">Yes</option>
                        <option value="no" selected="true">No</option>
                    </param>
                    <when value="yes">
                        <param argument="svi_batch_size" type="float" value="0.5" min="0" max="1" label="Batch size as a fraction"/>
                        <param argument="svi_learning_rate" type="float" value="1.0" min="0" max="1" label="Learning rate"/>
                        <param argument="svi_forgetting_rate" type="float" value="0.5" min="0" max="1" label="Forgetting rate"/>
                        <param argument="svi_start_stochastic" type="integer" value="1" label="First iteration to start SVI"/>
                    </when>
                    <when value="no"/>
                </conditional>
                <param argument="smooth_covariate" type="text" optional="true" label="Use a covariate (column in .obs) to learn smooth factors (MEFISTO)">
                    <expand macro="sanitize_query" />
                </param>
                <param argument="smooth_warping" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Learn the alignment of covariates (e.g. time points) from different groups?"/>
                <param argument="seed" type="integer" value="1" label="Random seed"/>
            </when>
            <when value="tl.umap">
                <param argument="min_dist" type="float" min="0" value="0.5" label="The effective minimum distance between embedded points"
                    help="Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points."/>
                <param argument="spread" type="float" value="1.0" label="The effective scale of embedded points"
                    help="Determines how clustered/clumped the embedded points are"/>
                <param argument="n_components" type="integer" value="2" label="The number of dimensions of the embedding"/>
                <param argument="maxiter" type="integer" optional="true" label="The number of iterations (epochs) of the optimization"
                    help="Called `n_epochs` in the original UMAP"/>
                <param argument="alpha" type="float" value="1.0" label="The initial learning rate for the embedding optimization"/>
                <param argument="gamma" type="float" value="1.0" label="Weighting applied to negative samples in low dimensional embedding optimization"
                    help="Values higher than one will result in greater weight being given to negative samples"/>
                <param argument="negative_sample_rate" type="integer" value="5" label="Negative sample rate"
                    help="The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample in optimizing the low dimensional embedding"/>
                <param argument="init_pos" type="select" label="How to initialize the low dimensional embedding"
                    help="Called `init` in the original UMAP">
                    <option value="spectral">Use a spectral embedding of the graph</option>
                    <option value="random">Assign initial embedding positions at random</option>
                </param>
                <expand macro="param_random_state" seed="42"/>
                <expand macro="param_neighbors_key"/>
            </when>
        </conditional>
        <expand macro="inputs_common_advanced" />
    </inputs>
    <outputs>
        <expand macro="muon_outputs"/>
    </outputs>
    <tests>
        <test expect_num_outputs="2">
            <!-- test2: tl.leiden -->
            <param name="mdata" location="https://zenodo.org/records/12570984/files/pbmc3k_chr21_processed.h5mu"/>
            <param name="method" value="tl.leiden"/>
            <conditional name="res">
                <param name="type" value="separate"/>
                <repeat name="modalities">
                    <param name="mod_name" value="rna"/>
                    <param name="resolution" value="2.0"/>
                </repeat>
                <repeat name="modalities">
                    <param name="mod_name" value="atac"/>
                    <param name="resolution" value="1.5"/>
                </repeat>
            </conditional>
            <conditional name="weights">
                <param name="type" value="separate"/>
                <repeat name="modalities">
                    <param name="mod_name" value="rna"/>
                    <param name="mod_weights" value="3"/>
                </repeat>
                <repeat name="modalities">
                    <param name="mod_name" value="atac"/>
                    <param name="mod_weights" value="1"/>
                </repeat>
            </conditional>
            <param name="random_state" value="0"/>
            <param name="key_added" value="leiden"/>
            <param name="neighbors_key" value="neighbors"/>
            <param name="directed" value="True"/>
            <section name="advanced_common">
                <param name="show_log" value="true" />
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="mu.tl.leiden"/>
                    <has_text_matching expression="'rna': 2.0"/>
                    <has_text_matching expression="'atac': 1.5"/>
                    <has_text_matching expression="'rna': 3"/>
                    <has_text_matching expression="'atac': 1"/>
                    <has_text_matching expression="random_state=0"/>
                    <has_text_matching expression="key_added='leiden'"/>
                    <has_text_matching expression="neighbors_key='neighbors'"/>
                    <has_text_matching expression="directed=True"/>
                </assert_contents>
            </output>
            <assert_stdout>
                <has_text_matching expression="179 × 490"/>
                <has_text_matching expression="179 x 178"/>
                <has_text_matching expression="179 x 312"/>
            </assert_stdout>
            <output name="mudata_out" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="mod/rna"/>
                    <has_h5_keys keys="mod/atac"/>
                    <has_h5_keys keys="obs/leiden"/>
                    <has_h5_keys keys="uns/leiden"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <!-- test3: tl.louvain -->
            <param name="mdata" location="https://zenodo.org/records/12570984/files/pbmc3k_chr21_processed.h5mu"/>
            <param name="method" value="tl.louvain"/>
            <conditional name="res">
                <param name="type" value="separate"/>
                <repeat name="modalities">
                    <param name="mod_name" value="rna"/>
                    <param name="resolution" value="2.0"/>
                </repeat>
                <repeat name="modalities">
                    <param name="mod_name" value="atac"/>
                    <param name="resolution" value="1.5"/>
                </repeat>
            </conditional>
            <conditional name="weights">
                <param name="type" value="separate"/>
                <repeat name="modalities">
                    <param name="mod_name" value="rna"/>
                    <param name="mod_weights" value="3"/>
                </repeat>
                <repeat name="modalities">
                    <param name="mod_name" value="atac"/>
                    <param name="mod_weights" value="1"/>
                </repeat>
            </conditional>
            <param name="random_state" value="0"/>
            <param name="key_added" value="louvain"/>
            <param name="neighbors_key" value="neighbors"/>
            <param name="directed" value="True"/>
            <section name="advanced_common">
                <param name="show_log" value="true" />
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="mu.tl.louvain"/>
                    <has_text_matching expression="'rna': 2.0"/>
                    <has_text_matching expression="'atac': 1.5"/>
                    <has_text_matching expression="'rna': 3"/>
                    <has_text_matching expression="'atac': 1"/>
                    <has_text_matching expression="random_state=0"/>
                    <has_text_matching expression="key_added='louvain'"/>
                    <has_text_matching expression="neighbors_key='neighbors'"/>
                    <has_text_matching expression="directed=True"/>
                </assert_contents>
            </output>
            <assert_stdout>
                <has_text_matching expression="179 × 490"/>
                <has_text_matching expression="179 x 178"/>
                <has_text_matching expression="179 x 312"/>
            </assert_stdout>
            <output name="mudata_out" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="mod/rna"/>
                    <has_h5_keys keys="mod/atac"/>
                    <has_h5_keys keys="obs/louvain"/>
                    <has_h5_keys keys="uns/louvain"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <!-- test4: tl.mofa -->
            <param name="mdata" location="https://zenodo.org/records/12570984/files/pbmc3k_chr21_processed.h5mu"/>
            <param name="method" value="tl.mofa"/>
            <param name="groups_label" value=""/>
            <param name="use_raw" value="False"/>
            <param name="use_var" value="highly_variable"/>
            <param name="use_obs" value="union"/>
            <param name="n_factors" value="10"/>
            <param name="n_iterations" value="10"/>
            <param name="convergence_mode" value="fast"/>
            <conditional name="svi">
                <param name="svi_mode" value="yes"/>
                <param name="svi_batch_size" value="0.5"/>
                <param name="svi_learning_rate" value="1.0"/>
                <param name="svi_forgetting_rate" value="0.5"/>
                <param name="svi_start_stochastic" value="1"/>
            </conditional>
            <param name="seed" value="1"/>
            <section name="advanced_common">
                <param name="show_log" value="true" />
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="mu.tl.mofa"/>
                    <has_text_matching expression="use_raw=False"/>
                    <has_text_matching expression="use_var='highly_variable'"/>
                    <has_text_matching expression="use_obs='union'"/>
                    <has_text_matching expression="n_factors=10"/>
                    <has_text_matching expression="n_iterations=10"/>
                    <has_text_matching expression="convergence_mode='fast'"/>
                    <has_text_matching expression="svi_batch_size=0.5"/>
                    <has_text_matching expression="svi_learning_rate=1.0"/>
                    <has_text_matching expression="svi_forgetting_rate=0.5"/>
                    <has_text_matching expression="svi_start_stochastic=1"/>
                    <has_text_matching expression="seed=1"/>
                </assert_contents>
            </output>
            <assert_stdout>
                <has_text_matching expression="179 × 490"/>
                <has_text_matching expression="179 x 178"/>
                <has_text_matching expression="179 x 312"/>
            </assert_stdout>
            <output name="mudata_out" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="mod/rna"/>
                    <has_h5_keys keys="mod/atac"/>
                    <has_h5_keys keys="uns/mofa"/>
                    <has_h5_keys keys="obsm/X_mofa"/>
                    <has_h5_keys keys="varm/LFs"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <!-- test5: tl.umap -->
            <param name="mdata" location="https://zenodo.org/records/12570984/files/pp.neighbors.h5mu"/>
            <param name="method" value="tl.umap"/>
            <param name="min_dist" value="0.5"/>
            <param name="spread" value="1.0"/>
            <param name="n_components" value="2"/>
            <param name="maxiter" value="10"/>
            <param name="alpha" value="1.0"/>
            <param name="gamma" value="1.0"/>
            <param name="negative_sample_rate" value="5"/>
            <param name="init_pos" value="spectral"/>
            <param name="random_state" value="42"/>
            <section name="advanced_common">
                <param name="show_log" value="true" />
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="mu.tl.umap"/>
                    <has_text_matching expression="min_dist=0.5"/>
                    <has_text_matching expression="spread=1.0"/>
                    <has_text_matching expression="n_components=2"/>
                    <has_text_matching expression="maxiter=10"/>
                    <has_text_matching expression="alpha=1.0"/>
                    <has_text_matching expression="gamma=1.0"/>
                    <has_text_matching expression="negative_sample_rate=5"/>
                    <has_text_matching expression="init_pos='spectral'"/>
                    <has_text_matching expression="random_state=42"/>
                </assert_contents>
            </output>
            <assert_stdout>
                <has_text_matching expression="2711 × 1781"/>
                <has_text_matching expression="2711 x 555"/>
                <has_text_matching expression="2711 x 1226"/>
            </assert_stdout>
            <output name="mudata_out" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="mod/rna"/>
                    <has_h5_keys keys="mod/atac"/>
                    <has_h5_keys keys="uns/umap"/>
                    <has_h5_keys keys="obsm/X_umap"/>
                    <has_h5_keys keys="obsp/connectivities"/>
                    <has_h5_keys keys="obsp/distances"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
Cluster: Cluster cells using the Leiden algorithm (`muon.tl.leiden`)
====================================================================

        Cluster cells using the Leiden algorithm. This runs only the multiplex Leiden algorithm on the MuData object
        using connectivities of individual modalities.

        More details on the `muon documentation
        <https://muon.readthedocs.io/en/latest/api/generated/muon.tl.leiden.html#muon.tl.leiden>`__

Cluster: Cluster cells using the Louvain algorithm ('muon.tl.louvain')
======================================================================

        Cluster cells using the Louvain algorithm. This runs only the multiplex Louvain algorithm on the MuData object
        using connectivities of individual modalities

        More details on the `muon documentation
        <https://muon.readthedocs.io/en/latest/api/generated/muon.tl.louvain.html#muon.tl.louvain>`__

Analyze: Run Multi Omics Factor Analysis ('muon.tl.mofa')
=========================================================

        Run Multi-Omics Factor Analysis

        More details on the `muon documentation
        <https://muon.readthedocs.io/en/latest/api/generated/muon.tl.mofa.html#muon.tl.mofa>`__

Analyze: Similarity Network Fusion ('muon.tl.snf')
==================================================

        Similarity network fusion (SNF). See Wang et al., 2014 (DOI: 10.1038/nmeth.2810).

        More details on the `muon documentation
        <https://muon.readthedocs.io/en/latest/api/generated/muon.tl.snf.html#muon.tl.snf>`__

Embed: Embed the multimodal neighborhood graph using UMAP ('muon.tl.umap')
==========================================================================

        Embed the multimodal neighborhood graph using UMAP (McInnes et al, 2018). UMAP (Uniform Manifold Approximation
        and Projection) is a manifold learning technique suitable for visualizing high-dimensional data.

        More details on the `muon documentation
        <https://muon.readthedocs.io/en/latest/api/generated/muon.tl.umap.html#muon.tl.umap>`__

    ]]></help>
    <expand macro="citations"/>
</tool>