Mercurial > repos > iuc > cluster_analyze_embed_muon

diff cluster_analyze_embed_muon.xml @ 0:b82f44c4e8af draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/muon/ commit bcf2ec32c3d13b29da55e0e638da7ddd7162c436
author: iuc
date: Wed, 05 Feb 2025 10:53:32 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cluster_analyze_embed_muon.xml	Wed Feb 05 10:53:32 2025 +0000
@@ -0,0 +1,465 @@
+<tool id="cluster_analyze_embed_muon" name="muon Cluster, analyze, and embed multimodal data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="bio_tools"/>
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+    <command detect_errors="exit_code"><![CDATA[
+@COPY_MUDATA@
+@CMD@
+]]></command>
+    <configfiles>
+        <configfile name="script_file"><![CDATA[
+@CMD_imports@
+@CMD_read_inputs@
+
+#if $method.method == 'tl.louvain'
+mu.tl.louvain(
+    mdata,
+    @CMD_params_clustering@
+)
+
+#else if $method.method == 'tl.leiden'
+mu.tl.leiden(
+    mdata,
+    @CMD_params_clustering@
+)
+
+#else if $method.method == 'tl.mofa'
+mu.tl.mofa(
+    mdata,
+    #if $method.groups_label
+    groups_label='$method.groups_label',
+    #end if
+    use_raw=$method.use_raw,
+    #if $method.use_layer
+    use_layer='$method.use_layer',
+    #end if
+    #if $method.use_var
+    use_var='$method.use_var',
+    #end if
+    #if str($method.use_obs) != 'None'
+    use_obs='$method.use_obs',
+    #end if
+    n_factors=$method.n_factors,
+    scale_views=$method.scale_views,
+    scale_groups=$method.scale_groups,
+    center_groups=$method.center_groups,
+    ard_weights=$method.ard_weights,
+    ard_factors=$method.ard_factors,
+    spikeslab_weights=$method.spikeslab_weights,
+    spikeslab_factors=$method.spikeslab_factors,
+    n_iterations=$method.n_iterations,
+    convergence_mode='$method.convergence_mode',
+    use_float32=$method.use_float32,
+    #if $method.svi.svi_mode == 'yes'
+    svi_batch_size=$method.svi.svi_batch_size,
+    svi_learning_rate=$method.svi.svi_learning_rate,
+    svi_forgetting_rate=$method.svi.svi_forgetting_rate,
+    svi_start_stochastic=$method.svi.svi_start_stochastic,
+    #end if
+    #if $method.smooth_covariate
+    smooth_covariate='$method.smooth_covariate',
+    #end if
+    smooth_warping=$method.smooth_warping,
+    seed=$method.seed,
+    copy=False
+)
+
+#else if $method.method == 'tl.umap'
+mu.tl.umap(
+    mdata,
+    min_dist=$method.min_dist,
+    spread=$method.spread,
+    n_components=$method.n_components,
+    #if str($method.maxiter)
+    maxiter=$method.maxiter,
+    #end if
+    alpha=$method.alpha,
+    gamma=$method.gamma,
+    negative_sample_rate=$method.negative_sample_rate,
+    init_pos='$method.init_pos',
+    random_state=$method.random_state,
+    #if $method.neighbors_key
+    neighbors_key='$method.neighbors_key',
+    #end if
+    copy=False
+)
+#end if
+
+@CMD_mudata_write_outputs@
+]]></configfile>
+    </configfiles>
+    <inputs>
+        <expand macro="inputs_mudata"/>
+        <conditional name="method">
+            <param name="method" type="select" label="Method used for processing">
+                <option value="tl.leiden">Cluster: Cluster cells using the Leiden algorithm, using 'muon.tl.leiden'</option>
+                <option value="tl.louvain">Cluster: Cluster cells using the Louvain algorithm, using 'muon.tl.louvain'</option>
+                <option value="tl.mofa">Analyze: Run Multi Omics Factor Analysis, using 'muon.tl.mofa'</option>
+                <option value="tl.umap">Embed: Embed the multimodal neighborhood graph using UMAP, using 'muon.tl.umap'</option>
+            </param>
+            <when value="tl.leiden">
+                <expand macro="param_resolution"/>
+                <expand macro="param_weight"/>
+                <expand macro="param_random_state" seed="0"/>
+                <expand macro="param_key_added" key_added="leiden"/>
+                <expand macro="param_neighbors_key"/>
+                <expand macro="param_directed"/>
+            </when>
+            <when value="tl.louvain">
+                <expand macro="param_resolution"/>
+                <expand macro="param_weight"/>
+                <expand macro="param_random_state" seed="0"/>
+                <expand macro="param_key_added" key_added="louvain"/>
+                <expand macro="param_neighbors_key"/>
+                <expand macro="param_directed"/>
+            </when>
+            <when value="tl.mofa">
+                <param argument="groups_label" type="text" optional="true" label="a column name in adata.obs for grouping the samples">
+                    <expand macro="sanitize_query" />
+                </param>
+                <expand macro="param_use_raw" label="Use raw slot of AnnData as input values" checked="false"/>
+                <param argument="use_layer" type="text" optional="true" label="Use a specific layer of AnnData as input values"
+                    help="supersedes use_raw option">
+                    <expand macro="sanitize_query" />
+                </param>
+                <param argument="use_var" type="text" optional="true" label=".var column with a boolean value to select genes"
+                    help="e.g. “highly_variable”">
+                    <expand macro="sanitize_query" />
+                </param>
+                <param argument="use_obs" type="select" optional="true" label="strategy to deal with samples (cells) not being the same across modalities"
+                    help="Throws error if there are cells that are not same across modalities">
+                    <option value="union">union</option>
+                    <option value="intersection">intersection</option>
+                </param>
+                <param name="n_factors" type="integer" value="10" label="Number of factors to train the model with"/>
+                <param argument="scale_views" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Scale views to unit variance"/>
+                <param argument="scale_groups" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Scale groups to unit variance"/>
+                <param argument="center_groups" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Center groups to zero mean"/>
+                <param argument="ard_weights" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use view-wise sparsity"/>
+                <param argument="ard_factors" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use group-wise sparsity"/>
+                <param argument="spikeslab_weights" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use feature-wise sparsity (e.g. gene-wise)"/>
+                <param argument="spikeslab_factors" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Use sample-wise sparsity (e.g. cell-wise)"/>
+                <param name="n_iterations" type="integer" value="1000" label="Upper limit on the number of iterations"/>
+                <param argument="convergence_mode" type="select" label="Convergence mode">
+                    <option value="fast" selected="true">fast</option>
+                    <option value="medium">medium</option>
+                    <option value="slow">slow</option>
+                </param>
+                <param argument="use_float32" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Use reduced precision"/>
+                <conditional name="svi">
+                    <param name="svi_mode" type="select" label="Use Stochastic Variational Inference (SVI)?">
+                        <option value="yes">Yes</option>
+                        <option value="no" selected="true">No</option>
+                    </param>
+                    <when value="yes">
+                        <param argument="svi_batch_size" type="float" value="0.5" min="0" max="1" label="Batch size as a fraction"/>
+                        <param argument="svi_learning_rate" type="float" value="1.0" min="0" max="1" label="Learning rate"/>
+                        <param argument="svi_forgetting_rate" type="float" value="0.5" min="0" max="1" label="Forgetting rate"/>
+                        <param argument="svi_start_stochastic" type="integer" value="1" label="First iteration to start SVI"/>
+                    </when>
+                    <when value="no"/>
+                </conditional>
+                <param argument="smooth_covariate" type="text" optional="true" label="Use a covariate (column in .obs) to learn smooth factors (MEFISTO)">
+                    <expand macro="sanitize_query" />
+                </param>
+                <param argument="smooth_warping" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Learn the alignment of covariates (e.g. time points) from different groups?"/>
+                <param argument="seed" type="integer" value="1" label="Random seed"/>
+            </when>
+            <when value="tl.umap">
+                <param argument="min_dist" type="float" min="0" value="0.5" label="The effective minimum distance between embedded points"
+                    help="Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points."/>
+                <param argument="spread" type="float" value="1.0" label="The effective scale of embedded points"
+                    help="Determines how clustered/clumped the embedded points are"/>
+                <param argument="n_components" type="integer" value="2" label="The number of dimensions of the embedding"/>
+                <param argument="maxiter" type="integer" optional="true" label="The number of iterations (epochs) of the optimization"
+                    help="Called `n_epochs` in the original UMAP"/>
+                <param argument="alpha" type="float" value="1.0" label="The initial learning rate for the embedding optimization"/>
+                <param argument="gamma" type="float" value="1.0" label="Weighting applied to negative samples in low dimensional embedding optimization"
+                    help="Values higher than one will result in greater weight being given to negative samples"/>
+                <param argument="negative_sample_rate" type="integer" value="5" label="Negative sample rate"
+                    help="The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample in optimizing the low dimensional embedding"/>
+                <param argument="init_pos" type="select" label="How to initialize the low dimensional embedding"
+                    help="Called `init` in the original UMAP">
+                    <option value="spectral">Use a spectral embedding of the graph</option>
+                    <option value="random">Assign initial embedding positions at random</option>
+                </param>
+                <expand macro="param_random_state" seed="42"/>
+                <expand macro="param_neighbors_key"/>
+            </when>
+        </conditional>
+        <expand macro="inputs_common_advanced" />
+    </inputs>
+    <outputs>
+        <expand macro="muon_outputs"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="2">
+            <!-- test2: tl.leiden -->
+            <param name="mdata" location="https://zenodo.org/records/12570984/files/pbmc3k_chr21_processed.h5mu"/>
+            <param name="method" value="tl.leiden"/>
+            <conditional name="res">
+                <param name="type" value="separate"/>
+                <repeat name="modalities">
+                    <param name="mod_name" value="rna"/>
+                    <param name="resolution" value="2.0"/>
+                </repeat>
+                <repeat name="modalities">
+                    <param name="mod_name" value="atac"/>
+                    <param name="resolution" value="1.5"/>
+                </repeat>
+            </conditional>
+            <conditional name="weights">
+                <param name="type" value="separate"/>
+                <repeat name="modalities">
+                    <param name="mod_name" value="rna"/>
+                    <param name="mod_weights" value="3"/>
+                </repeat>
+                <repeat name="modalities">
+                    <param name="mod_name" value="atac"/>
+                    <param name="mod_weights" value="1"/>
+                </repeat>
+            </conditional>
+            <param name="random_state" value="0"/>
+            <param name="key_added" value="leiden"/>
+            <param name="neighbors_key" value="neighbors"/>
+            <param name="directed" value="True"/>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="mu.tl.leiden"/>
+                    <has_text_matching expression="'rna': 2.0"/>
+                    <has_text_matching expression="'atac': 1.5"/>
+                    <has_text_matching expression="'rna': 3"/>
+                    <has_text_matching expression="'atac': 1"/>
+                    <has_text_matching expression="random_state=0"/>
+                    <has_text_matching expression="key_added='leiden'"/>
+                    <has_text_matching expression="neighbors_key='neighbors'"/>
+                    <has_text_matching expression="directed=True"/>
+                </assert_contents>
+            </output>
+            <assert_stdout>
+                <has_text_matching expression="179 × 490"/>
+                <has_text_matching expression="179 x 178"/>
+                <has_text_matching expression="179 x 312"/>
+            </assert_stdout>
+            <output name="mudata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="mod/rna"/>
+                    <has_h5_keys keys="mod/atac"/>
+                    <has_h5_keys keys="obs/leiden"/>
+                    <has_h5_keys keys="uns/leiden"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- test3: tl.louvain -->
+            <param name="mdata" location="https://zenodo.org/records/12570984/files/pbmc3k_chr21_processed.h5mu"/>
+            <param name="method" value="tl.louvain"/>
+            <conditional name="res">
+                <param name="type" value="separate"/>
+                <repeat name="modalities">
+                    <param name="mod_name" value="rna"/>
+                    <param name="resolution" value="2.0"/>
+                </repeat>
+                <repeat name="modalities">
+                    <param name="mod_name" value="atac"/>
+                    <param name="resolution" value="1.5"/>
+                </repeat>
+            </conditional>
+            <conditional name="weights">
+                <param name="type" value="separate"/>
+                <repeat name="modalities">
+                    <param name="mod_name" value="rna"/>
+                    <param name="mod_weights" value="3"/>
+                </repeat>
+                <repeat name="modalities">
+                    <param name="mod_name" value="atac"/>
+                    <param name="mod_weights" value="1"/>
+                </repeat>
+            </conditional>
+            <param name="random_state" value="0"/>
+            <param name="key_added" value="louvain"/>
+            <param name="neighbors_key" value="neighbors"/>
+            <param name="directed" value="True"/>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="mu.tl.louvain"/>
+                    <has_text_matching expression="'rna': 2.0"/>
+                    <has_text_matching expression="'atac': 1.5"/>
+                    <has_text_matching expression="'rna': 3"/>
+                    <has_text_matching expression="'atac': 1"/>
+                    <has_text_matching expression="random_state=0"/>
+                    <has_text_matching expression="key_added='louvain'"/>
+                    <has_text_matching expression="neighbors_key='neighbors'"/>
+                    <has_text_matching expression="directed=True"/>
+                </assert_contents>
+            </output>
+            <assert_stdout>
+                <has_text_matching expression="179 × 490"/>
+                <has_text_matching expression="179 x 178"/>
+                <has_text_matching expression="179 x 312"/>
+            </assert_stdout>
+            <output name="mudata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="mod/rna"/>
+                    <has_h5_keys keys="mod/atac"/>
+                    <has_h5_keys keys="obs/louvain"/>
+                    <has_h5_keys keys="uns/louvain"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- test4: tl.mofa -->
+            <param name="mdata" location="https://zenodo.org/records/12570984/files/pbmc3k_chr21_processed.h5mu"/>
+            <param name="method" value="tl.mofa"/>
+            <param name="groups_label" value=""/>
+            <param name="use_raw" value="False"/>
+            <param name="use_var" value="highly_variable"/>
+            <param name="use_obs" value="union"/>
+            <param name="n_factors" value="10"/>
+            <param name="n_iterations" value="10"/>
+            <param name="convergence_mode" value="fast"/>
+            <conditional name="svi">
+                <param name="svi_mode" value="yes"/>
+                <param name="svi_batch_size" value="0.5"/>
+                <param name="svi_learning_rate" value="1.0"/>
+                <param name="svi_forgetting_rate" value="0.5"/>
+                <param name="svi_start_stochastic" value="1"/>
+            </conditional>
+            <param name="seed" value="1"/>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="mu.tl.mofa"/>
+                    <has_text_matching expression="use_raw=False"/>
+                    <has_text_matching expression="use_var='highly_variable'"/>
+                    <has_text_matching expression="use_obs='union'"/>
+                    <has_text_matching expression="n_factors=10"/>
+                    <has_text_matching expression="n_iterations=10"/>
+                    <has_text_matching expression="convergence_mode='fast'"/>
+                    <has_text_matching expression="svi_batch_size=0.5"/>
+                    <has_text_matching expression="svi_learning_rate=1.0"/>
+                    <has_text_matching expression="svi_forgetting_rate=0.5"/>
+                    <has_text_matching expression="svi_start_stochastic=1"/>
+                    <has_text_matching expression="seed=1"/>
+                </assert_contents>
+            </output>
+            <assert_stdout>
+                <has_text_matching expression="179 × 490"/>
+                <has_text_matching expression="179 x 178"/>
+                <has_text_matching expression="179 x 312"/>
+            </assert_stdout>
+            <output name="mudata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="mod/rna"/>
+                    <has_h5_keys keys="mod/atac"/>
+                    <has_h5_keys keys="uns/mofa"/>
+                    <has_h5_keys keys="obsm/X_mofa"/>
+                    <has_h5_keys keys="varm/LFs"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- test5: tl.umap -->
+            <param name="mdata" location="https://zenodo.org/records/12570984/files/pp.neighbors.h5mu"/>
+            <param name="method" value="tl.umap"/>
+            <param name="min_dist" value="0.5"/>
+            <param name="spread" value="1.0"/>
+            <param name="n_components" value="2"/>
+            <param name="maxiter" value="10"/>
+            <param name="alpha" value="1.0"/>
+            <param name="gamma" value="1.0"/>
+            <param name="negative_sample_rate" value="5"/>
+            <param name="init_pos" value="spectral"/>
+            <param name="random_state" value="42"/>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="mu.tl.umap"/>
+                    <has_text_matching expression="min_dist=0.5"/>
+                    <has_text_matching expression="spread=1.0"/>
+                    <has_text_matching expression="n_components=2"/>
+                    <has_text_matching expression="maxiter=10"/>
+                    <has_text_matching expression="alpha=1.0"/>
+                    <has_text_matching expression="gamma=1.0"/>
+                    <has_text_matching expression="negative_sample_rate=5"/>
+                    <has_text_matching expression="init_pos='spectral'"/>
+                    <has_text_matching expression="random_state=42"/>
+                </assert_contents>
+            </output>
+            <assert_stdout>
+                <has_text_matching expression="2711 × 1781"/>
+                <has_text_matching expression="2711 x 555"/>
+                <has_text_matching expression="2711 x 1226"/>
+            </assert_stdout>
+            <output name="mudata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="mod/rna"/>
+                    <has_h5_keys keys="mod/atac"/>
+                    <has_h5_keys keys="uns/umap"/>
+                    <has_h5_keys keys="obsm/X_umap"/>
+                    <has_h5_keys keys="obsp/connectivities"/>
+                    <has_h5_keys keys="obsp/distances"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+Cluster: Cluster cells using the Leiden algorithm (`muon.tl.leiden`)
+====================================================================
+
+        Cluster cells using the Leiden algorithm. This runs only the multiplex Leiden algorithm on the MuData object
+        using connectivities of individual modalities.
+
+        More details on the `muon documentation
+        <https://muon.readthedocs.io/en/latest/api/generated/muon.tl.leiden.html#muon.tl.leiden>`__
+
+Cluster: Cluster cells using the Louvain algorithm ('muon.tl.louvain')
+======================================================================
+
+        Cluster cells using the Louvain algorithm. This runs only the multiplex Louvain algorithm on the MuData object
+        using connectivities of individual modalities
+
+        More details on the `muon documentation
+        <https://muon.readthedocs.io/en/latest/api/generated/muon.tl.louvain.html#muon.tl.louvain>`__
+
+Analyze: Run Multi Omics Factor Analysis ('muon.tl.mofa')
+=========================================================
+
+        Run Multi-Omics Factor Analysis
+
+        More details on the `muon documentation
+        <https://muon.readthedocs.io/en/latest/api/generated/muon.tl.mofa.html#muon.tl.mofa>`__
+
+Analyze: Similarity Network Fusion ('muon.tl.snf')
+==================================================
+
+        Similarity network fusion (SNF). See Wang et al., 2014 (DOI: 10.1038/nmeth.2810).
+
+        More details on the `muon documentation
+        <https://muon.readthedocs.io/en/latest/api/generated/muon.tl.snf.html#muon.tl.snf>`__
+
+Embed: Embed the multimodal neighborhood graph using UMAP ('muon.tl.umap')
+==========================================================================
+
+        Embed the multimodal neighborhood graph using UMAP (McInnes et al, 2018). UMAP (Uniform Manifold Approximation
+        and Projection) is a manifold learning technique suitable for visualizing high-dimensional data.
+
+        More details on the `muon documentation
+        <https://muon.readthedocs.io/en/latest/api/generated/muon.tl.umap.html#muon.tl.umap>`__
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file