Mercurial > repos > iuc > preprocess_muon
diff preprocess_muon.xml @ 0:678260997e94 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/muon/ commit bcf2ec32c3d13b29da55e0e638da7ddd7162c436
author | iuc |
---|---|
date | Wed, 05 Feb 2025 10:53:25 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/preprocess_muon.xml Wed Feb 05 10:53:25 2025 +0000 @@ -0,0 +1,322 @@ +<tool id="preprocess_muon" name="muon filter and normalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="version_command"/> + <command detect_errors="exit_code"><![CDATA[ +@COPY_MUDATA@ +@CMD@ + ]]></command> + <configfiles> + <configfile name="script_file"><![CDATA[ +@CMD_imports@ +@CMD_read_inputs@ + +#if $method.method == 'pp.intersect_obs' +mu.pp.intersect_obs( + mdata +) +## Update the features. Only union features of the modalities are considered +mdata.update_var() + + +#else if $method.method == 'pp.l2norm' +mu.pp.l2norm( + mdata, + #if $method.mod + mod='$method.mod', + #end if + #if $method.rep + rep='$method.rep', + #end if + n_pcs=$method.n_pcs, + copy=False +) + +#else if $method.method == 'pp.neighbors' +mu.pp.neighbors( + mdata, + #if str($method.n_neighbors) + n_neighbors=$method.n_neighbors, + #end if + n_bandwidth_neighbors=$method.n_bandwidth_neighbors, + n_multineighbors=$method.n_multineighbors, + @CMD_neighbor_keys@ + metric='$method.metric', + #if $method.key_added + key_added='$method.key_added', + #end if + weight_key='$method.weight_key', + add_weights_to_modalities=$method.add_weights_to_modalities, + eps=$method.eps, + random_state=$method.random_state, + copy=False +) + +#else if $method.method == 'pp.sample_obs' +mu.pp.sample_obs( + mdata, + frac=$method.frac, + #if $method.groupby + groupby='$method.groupby', + #end if + #if str($method.min_n) + min_n=$method.min_n + #end if +) +#end if + +@CMD_mudata_write_outputs@ +]]></configfile> + </configfiles> + <inputs> + <expand macro="inputs_mudata"/> + <conditional name="method"> + <param name="method" type="select" label="Method used for plotting"> + <option value="pp.intersect_obs">Subset: Subset observations present only in all modalities, using 'muon.pp.intersect_obs'</option> + <option value="pp.l2norm">Normalize: Normalize observations to unit L2 norm, using 'muon.pp.l2norm'</option> + <option value="pp.neighbors">Search: Multimodal nearest neighbor search, using 'muon.pp.neighbors'</option> + <option value="pp.sample_obs">Return an object with some of the observations (subsampling). 'muon.pp.sample_obs'</option> + </param> + <when value="pp.intersect_obs"/> + <when value="pp.l2norm"> + <param argument="mod" type="text" optional="true" label="Names of the modalities to normalize" help="Leave empty to use all modalities"> + <expand macro="sanitize_string" /> + </param> + <param argument="rep" type="text" optional="true" label="The representation to normalize." help="X or any key for .obsm is valid, for all modalities"> + <expand macro="sanitize_query" /> + </param> + <param argument="n_pcs" type="integer" min="0" value="0" label="The number of principal components to use." help="This affects the result only if a PCA representation is being normalized"/> + </when> + <when value="pp.neighbors"> + <param argument="n_neighbors" type="integer" optional="true" label="Number of nearest neighbors to find" + help="If not set, arithmetic mean of per-modality neighbors will be used"/> + <param argument="n_bandwidth_neighbors" type="integer" value="20" label="Number of nearest neighbors to use for bandwidth selection"/> + <param argument="n_multineighbors" type="integer" value="200" label="Number of nearest neighbors in each modality to consider as candidates for multimodal nearest neighbors" + help="Only points in the union of per-modality nearest neighbors are candidates for multimodal nearest neighbors. + This will use the same metric that was used for the nearest neighbor search in the respective modality."/> + <expand macro="param_neighbor_keys"/> + <param argument="metric" type="select" label="Distance measure to use" help="This will only be used in the final step to search for nearest neighbors in the set of candidates."> + <option value="euclidean" selected="true">euclidean</option> + <option value="braycurtis">braycurtis</option> + <option value="canberra">canberra</option> + <option value="chebyshev">chebyshev</option> + <option value="cityblock">cityblock</option> + <option value="correlation">correlation</option> + <option value="cosine">cosine</option> + <option value="dice">dice</option> + <option value="hamming">hamming</option> + <option value="jaccard">jaccard</option> + <option value="jensenshannon">jensenshannon</option> + <option value="kulsinski">kulsinski</option> + <option value="mahalanobis">mahalanobis</option> + <option value="matching">matching</option> + <option value="minkowski">minkowski</option> + <option value="rogerstanimoto">rogerstanimoto</option> + <option value="russellrao">russellrao</option> + <option value="seuclidean">seuclidean</option> + <option value="sokalmichener">sokalmichener</option> + <option value="sokalsneath">sokalsneath</option> + <option value="sqeuclidean">sqeuclidean</option> + <option value="wminkowski">wminkowski</option> + <option value="yule">yule</option> + </param> + <expand macro="param_key_added_common"/> + <param argument="weight_key" type="text" value="mod_weight" label="Weight key to add to each modality’s .obs or to mdata.obs"> + <expand macro="sanitize_query" /> + </param> + <param argument="add_weights_to_modalities" type="boolean" truevalue="True" falsevalue="False" checked="false" label="If to add weights to individual modalities" help="By default, the weights will be added to mdata.obs"/> + <expand macro="param_eps" eps_value="0.0001"/> + <expand macro="param_random_state" seed="42"/> + </when> + <when value="pp.sample_obs"> + <param argument="frac" type="float" min="0" max="1" value="0.1" label="A fraction of observations to return"/> + <param argument="groupby" type="text" optional="true" label="Categorical column in .obs that is used for prior grouping before sampling observations"> + <expand macro="sanitize_query"/> + </param> + <param argument="min_n" type="integer" min="0" optional="true" label="Return min_n observations if fraction frac of observations is below min_n" + help="When groupby is not None, min_n is applied per group."/> + </when> + </conditional> + <expand macro="inputs_common_advanced" /> + </inputs> + <outputs> + <expand macro="muon_outputs"/> + </outputs> + <tests> + <test expect_num_outputs="2"> + <!-- test1: intersect_obs --> + <param name="mdata" ftype="h5ad" location="https://zenodo.org/records/12570984/files/pbmc3k_chr21.h5mu"/> + <param name="method" value="pp.intersect_obs"/> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="mu.pp.intersect_obs"/> + </assert_contents> + </output> + <assert_stdout> + <has_text_matching expression="179 × 490"/> + <has_text_matching expression="179 x 178"/> + <has_text_matching expression="179 x 312"/> + </assert_stdout> + <output name="mudata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="mod/rna"/> + <has_h5_keys keys="mod/atac"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <!-- test2: l2norm --> + <param name="mdata" ftype="h5ad" location="https://zenodo.org/records/12570984/files/pbmc3k_chr21.h5mu"/> + <param name="method" value="pp.l2norm"/> + <param name="n_pcs" value="5"/> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="mu.pp.l2norm"/> + <has_text_matching expression="copy=False"/> + <has_text_matching expression="n_pcs=5"/> + </assert_contents> + </output> + <assert_stdout> + <has_text_matching expression="179 × 490"/> + <has_text_matching expression="179 x 178"/> + <has_text_matching expression="179 x 312"/> + </assert_stdout> + <output name="mudata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="mod/rna"/> + <has_h5_keys keys="mod/atac"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <!-- test3: neighbors --> + <param name="mdata" ftype="h5ad" location="https://zenodo.org/records/12570984/files/pbmc3k_chr21_pp.neighbors_in.h5mu"/> + <param name="method" value="pp.neighbors"/> + <param name="n_neighbors" value="2"/> + <param name="n_bandwidth_neighbors" value="3"/> + <param name="n_multineighbors" value="5"/> + <conditional name="n_keys"> + <param name="type" value="separate"/> + <repeat name="modalities"> + <param name="mod_name" value="rna"/> + <param name="neighbor_keys" value="neighbors"/> + </repeat> + <repeat name="modalities"> + <param name="mod_name" value="atac"/> + <param name="neighbor_keys" value="neighbors"/> + </repeat> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="mu.pp.neighbors"/> + <has_text_matching expression="copy=False"/> + <has_text_matching expression="n_neighbors=2"/> + <has_text_matching expression="n_bandwidth_neighbors=3"/> + <has_text_matching expression="n_multineighbors=5"/> + <has_text_matching expression="metric='euclidean'"/> + <has_text_matching expression="weight_key='mod_weight'"/> + <has_text_matching expression="add_weights_to_modalities=False"/> + <has_text_matching expression="eps=0.0001"/> + <has_text_matching expression="random_state=42"/> + </assert_contents> + </output> + <assert_stdout> + <has_text_matching expression="2711 × 1781"/> + <has_text_matching expression="2711 x 555"/> + <has_text_matching expression="2711 x 1226"/> + </assert_stdout> + <output name="mudata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="mod/rna"/> + <has_h5_keys keys="mod/atac"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <!-- test4: sample_obs --> + <param name="mdata" ftype="h5ad" location="https://zenodo.org/records/12570984/files/pbmc3k_chr21.h5mu"/> + <param name="method" value="pp.sample_obs"/> + <param name="frac" value="0.5"/> + <param name="min_n" value="10"/> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="mu.pp.sample_obs"/> + <has_text_matching expression="frac=0.5"/> + <has_text_matching expression="min_n=10"/> + </assert_contents> + </output> + <assert_stdout> + <has_text_matching expression="179 × 490"/> + <has_text_matching expression="179 x 178"/> + <has_text_matching expression="179 x 312"/> + </assert_stdout> + <output name="mudata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="mod/rna"/> + <has_h5_keys keys="mod/atac"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +Filter: Filter observations (samples or cells) in-place using any column in .obs or in .X ('muon.pp.filter_obs') +================================================================================================================ + + Filter observations (samples or cells) in-place using any column in .obs or in .X. + + More details on the `muon documentation + <https://muon.readthedocs.io/en/latest/api/generated/muon.pp.filter_obs.html#muon.pp.filter_obs>`__ + +Filter: Filter variables (features) ('muon.pp.filter_var') +========================================================== + + Filter variables (features, e.g. genes) in-place using any column in .var or row in .X. + + More details on the `muon documentation + <https://muon.readthedocs.io/en/latest/api/generated/muon.pp.filter_var.html#muon.pp.filter_var>`__ + +Subset: Subset observations (samples or cells) in-place taking observations present only in all modalities ('muon.pp.intersect_obs') +==================================================================================================================================== + + Subset observations (samples or cells) in-place taking observations present only in all modalities. + + More details on the `muon documentation + <https://muon.readthedocs.io/en/latest/api/generated/muon.pp.intersect_obs.html#muon.pp.intersect_obs>`__ + +Normalize: Normalize observations to unit L2 norm ('muon.pp.l2norm') +==================================================================== + + Normalize observations to unit L2 norm. + + More details on the `muon documentation + <https://muon.readthedocs.io/en/latest/api/generated/muon.pp.l2norm.html#muon.pp.l2norm>`__ + +Search: Multimodal nearest neighbor search ('muon.pp.neighbors') +================================================================ + + Multimodal nearest neighbor search by implementing the multimodal nearest neighbor method of Hao et al. and Swanson et al. + The neighbor search efficiency on this heavily relies on UMAP. In particular, you may want to decrease + n_multineighbors for large data set to avoid excessive peak memory use. To achieve results as close as possible + to the Seurat implementation, observations must be normalized to unit L2 norm prior to running per-modality + nearest-neighbor search. + + More details on the `muon documentation + <https://muon.readthedocs.io/en/latest/api/generated/muon.pp.neighbors.html#muon.pp.neighbors>`__ + + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file