Mercurial > repos > iuc > episcanpy_cluster_embed
diff cluster_embed.xml @ 0:7340d619d7f8 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/episcanpy/ commit ce8ee43d7285503a24c7b0f55c09c513be8c66f5
| author | iuc |
|---|---|
| date | Tue, 18 Apr 2023 13:19:32 +0000 |
| parents | |
| children | f4713992f23a |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cluster_embed.xml Tue Apr 18 13:19:32 2023 +0000 @@ -0,0 +1,450 @@ +<tool id="episcanpy_cluster_embed" name="Cluster, embed and annotate" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>with EpiScanpy</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="bio_tools"/> + <expand macro="requirements"/> + <expand macro="version_command"/> + <command detect_errors="exit_code"><![CDATA[ +@CMD@ + ]]></command> + <configfiles> + <configfile name="script_file"><![CDATA[ +@CMD_imports@ +@CMD_read_inputs@ +import episcanpy as esc +#if $method.method == 'pp.lazy' +esc.pp.lazy( + adata, + pp_pca=$method.lazy_pp_pca, + svd_solver='$method.lazy_svd_solver', + nb_pcs=$method.lazy_nb_pcs, + n_neighbors=$method.lazy_n_neighbors, + perplexity=$method.lazy_perplexity, + method='$method.lazy_method', + metric='$method.lazy_metric', + min_dist=$method.lazy_min_dist, + spread=$method.lazy_spread, + use_highly_variable=$method.lazy_use_highly_variable, + n_components=$method.lazy_n_components, + ) + +#else if $method.method == 'tl.rank_features' +esc.tl.rank_features( + adata, + omic='ATAC', + groupby='$method.rank_features_groupby', + use_raw=$method.rank_features_use_raw, + groups='$method.rank_features_groups', + reference='$method.rank_features_reference', + n_features=$method.rank_features_n_features, + rankby_abs=$method.rank_features_rankby_abs, + key_added='$method.rank_features_key_added', + copy=False, + method='$method.rank_features_method', + corr_method='$method.rank_features_corr_method' + ) + +#else if $method.method == 'tl.find_genes' +esc.tl.find_genes( + adata, + gtf_file='$method.find_genes_gtf_file', + key_added='$method.find_genes_key_added', + upstream=$method.find_genes_upstream, + feature_type='$method.find_genes_feature_type', + annotation='$method.find_genes_annotation', + raw=$method.find_genes_raw) + +#else if $method.method == 'tl.get_n_clusters' +esc.tl.getNClusters( + adata, + n_cluster=$method.get_n_clusters_n_cluster, + range_min=$method.get_n_clusters_range_min, + range_max=$method.get_n_clusters_range_max, + max_steps=$method.get_n_clusters_max_steps, + method='$method.get_n_clusters_method', + key_added='$method.get_n_clusters_key_added' + ) + +#else if $method.method == 'tl.kmeans' +esc.tl.kmeans( + adata, + num_clusters=$method.kmeans_num_clusters + ) + +#else if $method.method == 'tl.hc' +esc.tl.hc( + adata, + num_clusters=$method.hc_num_clusters + ) +#end if +adata.write('anndata.h5ad') +]]></configfile> + </configfiles> + <inputs> + <expand macro="inputs_anndata"/> + <conditional name="method"> + <param argument="method" type="select" label="Method used for Clustering or Embedding"> + <option value="pp.lazy">Embedding: Automatically compute PCA coordinates, loadings and variance decomposition, a neighborhood graph of observations, t-distributed stochastic neighborhood embedding (tSNE) Uniform Manifold Approximation and Projection (UMAP), using 'pp.lazy'</option> + <option value="tl.rank_features">Rank features for characterizing groups, using 'tl.rank_features'</option> + <option value="tl.find_genes">Embedding: Find genes and add annotations, using 'pp.find_genes'</option> + <option value="tl.get_n_clusters">Clustering: Test different settings of louvain to obtain the target number of clusters, using 'tl.getNClusters'</option> + <option value="tl.kmeans">Clustering: Compute kmeans clustering using X_pca fits, using 'tl.kmeans'</option> + <option value="tl.hc">Clustering: Compute hierarchical clustering using X_pca fits, using 'tl.hc'</option> + </param> + <when value="pp.lazy"> + <param name="lazy_pp_pca" value="True" type="select" label="Compute PCA coordinates before the neighborhood graph" help="(pp_pca)"> + <option value="True" selected="true">True</option> + <option value="False">False</option> + </param> + <param name="lazy_svd_solver" type="select" label="SVD solver to use" help="(svd_solver)"> + <option value="arpack" selected="true">arpack (for the ARPACK wrapper in SciPy)</option> + <option value="randomized">randomized (for the randomized algorithm due to Halko (2009)</option> + <option value="auto">auto (chooses automatically depending on the size of the problem)</option> + <option value="lobpcg">lobpcg (an alternative SciPy solver)</option> + </param> + <param name="lazy_nb_pcs" value="50" min="0" type="integer" label="Number of principal component computed for PCA (and therefore neighbors, tsne and umap)" help="(nb_pcs)"/> + <param name="lazy_n_neighbors" value="15" min="0" type="integer" label="Size of the local neighborhood (number of neighboring data points) used for manifold approximation" help="(n_neighbors)"/> + <param name="lazy_perplexity" value="30" min="0" type="integer" label="Perplexity (number of nearest neighbors used in other manifold learning algorithms)" help="(perplexity)"/> + <param name="lazy_method" type="select" label="Kernel to use for computing connectivities" help="(method)"> + <option value="umap" selected="true">umap</option> + <option value="gauss">gauss</option> + </param> + <param name="lazy_metric" type="select" label="Metric that returns a distance" help="(metric)"> + <option value="euclidean" selected="true">euclidean</option> + <option value="cityblock">cityblock</option> + <option value="cosine">cosine</option> + <option value="l1">l1</option> + <option value="l2">l2</option> + <option value="manhattan">manhattan</option> + <option value="braycurtis">braycurtis</option> + <option value="canberra">canberra</option> + <option value="chebyshev">chebyshev</option> + <option value="correlation">correlation</option> + <option value="dice">dice</option> + <option value="hamming">hamming</option> + <option value="jaccard">jaccard</option> + <option value="kulsinski">kulsinski</option> + <option value="mahalanobis">mahalanobis</option> + <option value="minkowski">minkowski</option> + <option value="rogerstanimoto">rogerstanimoto</option> + <option value="russelrao">russelrao</option> + <option value="seuclidean">seuclidean</option> + <option value="sokalmichener">sokalmichener</option> + <option value="sokalsneath">sokalsneath</option> + <option value="sqeuclidean">sqeuclidean</option> + <option value="yule">yule</option> + </param> + <param name="lazy_min_dist" value="0.5" min="0" type="float" label="The effective minimum distance between embedded points" help="(min_dist)"/> + <param name="lazy_spread" value="1.0" type="float" label="The effective scale of embedded points" help="(spread)"/> + <param name="lazy_use_highly_variable" type="select" label="Use highly variable genes only" help="(use_highly_variable)"> + <option value="True">True</option> + <option value="False" selected="true">False</option> + </param> + <param name="lazy_n_components" value="2" min="0" type="integer" label="The number of dimensions of the UMAP embedding" help="(n_components)"/> + </when> + <when value="tl.rank_features"> + <param name="rank_features_groupby" value="louvain" type="text" label="The key of the observations grouping to consider" help="(groupby)"> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"> + <add value="_" /> + <add value="-" /> + <add value=" " /> + <add value="." /> + </valid> + </sanitizer> + <validator type="regex">[0-9a-zA-Z_. -]+</validator> + </param> + <param name="rank_features_use_raw" type="select" label="Use raw attribute of Anndata if present" help="(use_raw)"> + <option value="True" selected="true">True</option> + <option value="False">False</option> + </param> + <param name="rank_features_groups" value="all" type="text" label="Subset of groups, to which comparison shall be restricted" help="(groups)"> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"> + <add value="_" /> + <add value="-" /> + <add value="." /> + <add value=" " /> + <add value="," /> + </valid> + </sanitizer> + <validator type="regex">[0-9a-zA-Z_., -]+</validator> + </param> + <param name="rank_features_reference" value="rest" type="text" label="Compare each group with respect to this group" help="(reference)"> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"> + <add value="_" /> + <add value="-" /> + <add value="." /> + <add value=" " /> + <add value="," /> + </valid> + </sanitizer> + <validator type="regex">[0-9a-zA-Z_., -]+</validator> + </param> + <param name="rank_features_n_features" value="100" min="1" type="integer" label="The number of features that appear in the returned tables" help="(n_features)"/> + <param name="rank_features_rankby_abs" type="select" label="Rank genes by the absolute value of the score, not by the score" help="(rankby_abs)"> + <option value="True" >True</option> + <option value="False" selected="true">False</option> + </param> + <param name="rank_features_key_added" value="rank_features_groups" type="text" label="The key in adata.uns information is saved to" help="(key_added)"> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"> + <add value="_" /> + <add value="-" /> + <add value="." /> + <add value=" " /> + <add value="," /> + </valid> + </sanitizer> + <validator type="regex">[0-9a-zA-Z_., -]+</validator> + </param> + <param name="rank_features_method" type="select" label="Method to use" help="(method)"> + <option value="" selected="true">Auto select for ATAC</option> + <option value="logreg">Logistic regression</option> + <option value="t-test" >t-test</option> + <option value="t-test_overestim_var">t-test_overestim_var</option> + <option value="wilcoxon">Wilcoxon rank sum</option> + </param> + <param name="rank_features_corr_method" value="benjamini-hochberg" type="select" label="p-value correction method" help="(corr_method)"> + <option value="benjamini-hochberg">Benjamini Hochberg</option> + <option value="bonferroni">Bonferroni</option> + </param> + </when> + <when value="tl.find_genes"> + <param name="find_genes_gtf_file" type="data" format="gtf" label="Annotation GTF file" help="(gtf_file)"/> + <param name="find_genes_key_added" value="transcript_annotation" type="text" label="Key added" help="(key_added)"> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"> + <add value="_" /> + <add value="-" /> + <add value="." /> + <add value=" " /> + <add value="," /> + </valid> + </sanitizer> + <validator type="regex">[0-9a-zA-Z_., -]+</validator> + </param> + <param name="find_genes_upstream" value="2000" min="0" type="integer" label="Upstream" help="(upstream)"/> + <param name="find_genes_feature_type" value="transcript" type="text" label="Feature type" help="(feature_type)"> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"> + <add value="_" /> + <add value="-" /> + <add value="." /> + <add value=" " /> + <add value="," /> + </valid> + </sanitizer> + <validator type="regex">[0-9a-zA-Z_., -]+</validator> + </param> + <param name="find_genes_annotation" value="HAVANA" type="text" label="Annotation" help="(annotation)"> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"> + <add value="_" /> + <add value="-" /> + <add value="." /> + <add value=" " /> + <add value="," /> + </valid> + </sanitizer> + <validator type="regex">[0-9a-zA-Z_., -]+</validator> + </param> + <param name="find_genes_raw" type="select" label="Raw?" help="(raw)"> + <option value="True">True</option> + <option value="False" selected="true">False</option> + </param> + </when> + <when value="tl.get_n_clusters"> + <param name="get_n_clusters_n_cluster" value="14" min="1" type="integer" label="Number of clusters" help="(n_cluster)"/> + <param name="get_n_clusters_method" type="select" label="Clustering method to use" help="(method)"> + <option value="leiden" selected="true">leiden</option> + <option value="louvain">louvain</option> + </param> + <param name="get_n_clusters_range_min" value="0" min="0" type="integer" label="Range minimum" help="(range_min)"/> + <param name="get_n_clusters_range_max" value="3" min="1" type="integer" label="Range maximum" help="(range_max)"/> + <param name="get_n_clusters_max_steps" value="20" min="1" type="integer" label="Maximum number of steps" help="(max_steps)"/> + <param name="get_n_clusters_key_added" value="None" type="text" label="Variable name in obs" help="(key_added)"> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"> + <add value="_" /> + <add value="-" /> + <add value="." /> + <add value=" " /> + <add value="," /> + </valid> + </sanitizer> + <validator type="regex">[0-9a-zA-Z_., -]+</validator> + </param> + </when> + <when value="tl.kmeans"> + <param name="kmeans_num_clusters" value="14" min="1" type="integer" label="Number of clusters" help="(num_clusters)"/> + </when> + <when value="tl.hc"> + <param name="hc_num_clusters" value="14" min="1" type="integer" label="Number of clusters" help="(num_clusters)"/> + </when> + </conditional> + <expand macro="inputs_common_advanced"/> + </inputs> + <outputs> + <expand macro="anndata_outputs"/> + </outputs> + <tests> + <test expect_num_outputs="2"> + <!-- test 0- pp.lazy --> + <param name="adata" value="krumsiek11.h5ad" /> + <conditional name="method"> + <param name="method" value="pp.lazy"/> + <param name="lazy_pp_pca" value="True"/> + <param name="lazy_svd_solver" value="arpack"/> + <param name="lazy_nb_pcs" value="5"/> + <param name="lazy_pp_n_neighbors" value="15"/> + <param name="lazy_pp_perplexity" value="30"/> + <param name="lazy_method" value="umap"/> + <param name="lazy_metric" value="euclidean"/> + <param name="lazy_min_dist" value="0.5"/> + <param name="lazy_spread" value="1.0"/> + <param name="lazy_use_highly_variable" value="False"/> + <param name="lazy_n_components" value="2"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="esc.pp.lazy"/> + <has_text_matching expression="adata"/> + <has_text_matching expression="nb_pcs=5"/> + </assert_contents> + </output> + <output name="anndata_out" file="krumsiek11.pp.lazy.h5ad" ftype="h5ad" compare="sim_size"/> + </test> + <test expect_num_outputs="2"> + <!-- test 1- tl.rank_features --> + <param name="adata" value="krumsiek11.pp.lazy.tl.louvain.h5ad" /> + <conditional name="method"> + <param name="method" value="tl.rank_features"/> + <param name="rank_features_groupby" value="louvain"/> + <param name="rank_features_use_raw" value="False"/> + <param name="rank_features_groups" value="all"/> + <param name="rank_features_reference" value="rest"/> + <param name="rank_features_n_features" value="100"/> + <param name="rank_features_rankby_abs" value="False"/> + <param name="rank_features_key_added" value="rank_features_groups"/> + <param name="rank_features_method" value=""/> + <param name="rank_features_corr_method" value="benjamini-hochberg"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="anndata_out" file="krumsiek11.tl.rank_features.h5ad" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="obs, uns, obsm, varm, obsp" /> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <!-- test 2-tl.find_genes --> + <param name="adata" value="chrY.h5ad" /> + <conditional name="method"> + <param name="method" value="tl.find_genes"/> + <param name="find_genes_gtf_file" value="chrY.gtf"/> + <param name="find_genes_key_added" value="transcript_annotation"/> + <param name="find_genes_upstream" value="2000"/> + <param name="find_genes_feature_type" value="transcript"/> + <param name="find_genes_annotation" value="HAVANA"/> + <param name="find_genes_raw" value="False"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="anndata_out" file="chrY_with_transcript_annotation.h5ad" ftype="h5ad" compare="sim_size"> + <assert_contents> + <has_h5_keys keys="var" /> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <!-- test 3-tl.get_n_clusters --> + <param name="adata" value="krumsiek11.pp.lazy.tl.louvain.h5ad" /> + <conditional name="method"> + <param name="method" value="tl.get_n_clusters"/> + <param name="get_n_clusters_n_clusters" value="3"/> + <param name="get_n_clusters_method" value="louvain"/> + <param name="get_n_clusters_range_min" value="0"/> + <param name="get_n_clusters_range_max" value="3"/> + <param name="get_n_clusters_max_steps" value="20"/> + <conditional name="get_n_clusters_obs_key"> + <param name="get_n_clusters_key_added" value="None"/> + </conditional> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="anndata_out" file="krumsiek11.tl.get_n_clusters.h5ad" ftype="h5ad"/> + </test> + <test expect_num_outputs="2"> + <!-- test 4-tl.kmeans --> + <param name="adata" value="krumsiek11.pp.lazy.h5ad" /> + <conditional name="method"> + <param name="method" value="tl.kmeans"/> + <param name="kmeans_num_clusters" value="14"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="anndata_out" file="krumsiek11.tl.kmeans.h5ad" ftype="h5ad"/> + </test> + <test expect_num_outputs="2"> + <!-- test 5-tl.hc --> + <param name="adata" value="krumsiek11.pp.lazy.h5ad" /> + <conditional name="method"> + <param name="method" value="tl.hc"/> + <param name="hc_num_clusters" value="14"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="anndata_out" file="krumsiek11.tl.hc.h5ad" ftype="h5ad"/> + </test> + </tests> + <help><![CDATA[ + +Automatically compute PCA coordinates (`pp.lazy`) +======================================================================================== +This function automatically computes PCA coordinates, loadings and variance decomposition, a neighborhood graph of +observations, t-distributed stochastic neighborhood embedding (tSNE) Uniform Manifold Approximation and Projection (UMAP). + +More details on the `episcanpy documentation +<https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.lazy.html>`__ + +Find and add gene annotations (`tl.find_genes`) +======================================================================================== +This function adds a gene annotation to an AnnData (h5ad) file from annotations file (.annotation.gtf). + +Automatically obtain target number of clusters (`tl.getNClusters`) +======================================================================================== +This function will test different settings of louvain to obtain the target number of clusters. + +More details on the `episcanpy documentation +<https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.tl.getNClusters.html>`__ + +Perform kmeans clustering (`tl.kmeans`) +======================================================================================== +This function will perform kmeans clustering using X_pca fits. + +More details on the `episcanpy documentation +<https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.tl.kmeans.html>`__ + +Compute hierarchical clustering using X_pca fits (`tl.hc`) +=================================================================== + +This function computes heirarchical clustering using X_pca fits using random_state=2019. + +More details on the `episcanpy documentation +<https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.tl.hc.html>`__ + ]]></help> + <expand macro="citations"/> +</tool>
