comparison cluster_reduce_dimension.xml @ 1:20cfb9f3dded draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 8ef5f7c6f8728608a3f05bb51e11b642b84a05f5"
author iuc
date Wed, 16 Oct 2019 06:29:43 -0400
parents 0e212e42ef88
children dc2de14881ba
comparison
equal deleted inserted replaced
0:0e212e42ef88 1:20cfb9f3dded
1 <tool id="scanpy_cluster_reduce_dimension" name="Cluster and reduce dimension with scanpy" version="@galaxy_version@"> 1 <tool id="scanpy_cluster_reduce_dimension" name="Cluster," version="@galaxy_version@">
2 <description></description> 2 <description>infer trajectories and embed with scanpy</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 <xml name="pca_inputs"> 5 <xml name="pca_inputs">
6 <param name="n_comps" type="integer" min="0" value="50" label="Number of principal components to compute" help=""/> 6 <param argument="n_comps" type="integer" min="0" value="50" label="Number of principal components to compute" help=""/>
7 <param name="dtype" type="text" value="float32" label="Numpy data type string to which to convert the result" help=""/> 7 <param argument="dtype" type="text" value="float32" label="Numpy data type string to which to convert the result" help=""/>
8 <conditional name="pca"> 8 <conditional name="pca">
9 <param name="chunked" type="select" label="Type of PCA?"> 9 <param argument="chunked" type="select" label="Type of PCA?">
10 <option value="True">Incremental PCA on segments (incremental PCA automatically zero centers and ignores settings of `random_seed` and `svd_solver`)</option> 10 <option value="True">Incremental PCA on segments (incremental PCA automatically zero centers and ignores settings of `random_seed` and `svd_solver`)</option>
11 <option value="False" selected="true">Full PCA</option> 11 <option value="False" selected="true">Full PCA</option>
12 </param> 12 </param>
13 <when value="True"> 13 <when value="True">
14 <param name="chunk_size" type="integer" min="0" value="" label="chunk_size" help="Number of observations to include in each chunk"/> 14 <param argument="chunk_size" type="integer" min="0" value="" label="chunk_size" help="Number of observations to include in each chunk"/>
15 </when> 15 </when>
16 <when value="False"> 16 <when value="False">
17 <param name="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true" 17 <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true"
18 label="Compute standard PCA from covariance matrix?" 18 label="Compute standard PCA from covariance matrix?"
19 help="If not, it omits zero-centering variables (uses *TruncatedSVD* from scikit-learn), which allows to handle sparse input efficiently."/> 19 help="If not, it omits zero-centering variables (uses *TruncatedSVD* from scikit-learn), which allows to handle sparse input efficiently."/>
20 <expand macro="svd_solver"/> 20 <expand macro="svd_solver"/>
21 <param name="random_state" type="integer" value="0" label="Initial states for the optimization" help=""/> 21 <param argument="random_state" type="integer" value="0" label="Initial states for the optimization" help=""/>
22 </when> 22 </when>
23 </conditional> 23 </conditional>
24 <param argument="use_highly_variable" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Use highly variable genes only?" help="They should be use if they have been determined beforehand."/>
24 </xml> 25 </xml>
25 <token name="@CMD_pca_outputs@"><![CDATA[ 26 <xml name="param_random_state">
26 np.savetxt('$X_pca', adata.obsm['X_pca'], delimiter='\t') 27 <param argument="random_state" type="integer" value="0" label="Random state" help="Change the initialization of the optimization."/>
27 np.savetxt('$PCs', adata.varm['PCs'], delimiter='\t') 28 </xml>
28 np.savetxt('$variance', adata.uns['pca']['variance'], delimiter='\t') 29 <xml name="param_use_weights">
29 np.savetxt('$variance_ratio', adata.uns['pca']['variance_ratio'], delimiter='\t') 30 <param argument="use_weights" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Use weights from knn graph?"/>
31 </xml>
32 <token name="@CMD_pca_help@"><![CDATA[
33 The PCA is computed using the implementation of *scikit-learn*.
34
35 The returned AnnData object contains:
36
37 - PCA coordinates in the multi-dimensional observation annotation (obsm)
38 - Principal components containing the loadings in the multi-dimensional variable annotation (varm)
39 - The variance decomposition in the unstructured annotation (uns)
40 - Ratio of explained variance for PCA (variance)
41 - Explained variance, equivalent to the eigenvalues of the covariance matrix
42
43 This data is accessible using the inspect tool for AnnData
30 ]]></token> 44 ]]></token>
31 <token name="@CMD_pca_params@"><![CDATA[ 45 <token name="@CMD_pca_params@"><![CDATA[
32 data=adata, 46 data=adata,
33 n_comps=$method.n_comps, 47 n_comps=$method.n_comps,
34 dtype='$method.dtype', 48 dtype='$method.dtype',
35 copy=False, 49 copy=False,
36 chunked=$method.pca.chunked, 50 chunked=$method.pca.chunked,
37 #if $method.pca.chunked == 'True' 51 #if $method.pca.chunked == 'True'
38 chunk_size=$method.pca.chunk_size 52 chunk_size=$method.pca.chunk_size,
39 #else 53 #else
40 zero_center='$method.pca.zero_center', 54 zero_center=$method.pca.zero_center,
41 svd_solver='$method.pca.svd_solver', 55 svd_solver='$method.pca.svd_solver',
42 random_state=$method.pca.random_state 56 random_state=$method.pca.random_state,
43 #end if 57 #end if
58 use_highly_variable=$method.use_highly_variable
44 ]]></token> 59 ]]></token>
45 <xml name="penalty">
46 <param argument="penalty" type="select" label="Norm used in the penalization" help="">
47 <option value="l1">l1</option>
48 <option value="l2">l2</option>
49 <option value="customized">customized</option>
50 </param>
51 </xml>
52 <xml name="custom_penalty">
53 <param argument="pen" type="text" value="" label="Norm used in the penalization" help=""/>
54 </xml>
55 <xml name="fit_intercept">
56 <param argument="fit_intercept" type="boolean" truevalue="True" falsevalue="False" checked="true"
57 label="Should a constant (a.k.a. bias or intercept) be added to the decision function?" help=""/>
58 </xml>
59 <xml name="random_state">
60 <param argument="random_state" type="integer" value="" optional="true"
61 label="The seed of the pseudo random number generator to use when shuffling the data" help=""/>
62 </xml>
63 <xml name="max_iter">
64 <param argument="max_iter" type="integer" min="0" value="100" label="Maximum number of iterations taken for the solvers to converge" help=""/>
65 </xml>
66 <xml name="multi_class">
67 <param argument="multi_class" type="select" label="Multi class" help="">
68 <option value="ovr">ovr: a binary problem is fit for each label</option>
69 <option value="multinomial">multinomial: the multinomial loss fit across the entire probability distribution, even when the data is binary</option>
70 <option value="auto">auto: selects ‘ovr’ if the data is binary and otherwise selects ‘multinomial’</option>
71 </param>
72 </xml>
73 </macros> 60 </macros>
74 <expand macro="requirements"/> 61 <expand macro="requirements"/>
75 <expand macro="version_command"/> 62 <expand macro="version_command"/>
76 <command detect_errors="exit_code"><![CDATA[ 63 <command detect_errors="exit_code"><![CDATA[
77 @CMD@ 64 @CMD@
88 #if $method.flavor.flavor == 'vtraag' and $method.flavor.resolution 75 #if $method.flavor.flavor == 'vtraag' and $method.flavor.resolution
89 resolution=$method.flavor.resolution, 76 resolution=$method.flavor.resolution,
90 #end if 77 #end if
91 random_state=$method.random_state, 78 random_state=$method.random_state,
92 key_added='$method.key_added', 79 key_added='$method.key_added',
80 directed=$method.directed,
81 use_weights=$method.use_weights,
93 copy=False) 82 copy=False)
94 #elif $method.method == 'pp.pca' 83
84 #else if $method.method == 'tl.leiden'
85 sc.tl.leiden(
86 adata=adata,
87 resolution=$method.resolution,
88 random_state=$method.random_state,
89 key_added='$method.key_added',
90 use_weights=$method.use_weights,
91 n_iterations=$method.n_iterations,
92 copy=False)
93
94 #else if $method.method == 'pp.pca'
95 sc.pp.pca(@CMD_pca_params@) 95 sc.pp.pca(@CMD_pca_params@)
96 @CMD_pca_outputs@ 96
97 #elif $method.method == 'tl.pca' 97 #else if $method.method == 'tl.pca'
98 sc.tl.pca(@CMD_pca_params@) 98 sc.tl.pca(@CMD_pca_params@)
99 @CMD_pca_outputs@ 99
100 #elif $method.method == 'tl.diffmap' 100 #else if $method.method == 'tl.diffmap'
101 sc.tl.diffmap( 101 sc.tl.diffmap(
102 adata=adata, 102 adata=adata,
103 n_comps=$method.n_comps, 103 n_comps=$method.n_comps,
104 copy =False) 104 copy =False)
105 np.savetxt('$X_diffmap', adata.obsm['X_diffmap'], delimiter='\t') 105
106 #elif $method.method == 'tl.tsne' 106 #else if $method.method == 'tl.tsne'
107 sc.tl.tsne( 107 sc.tl.tsne(
108 adata=adata, 108 adata=adata,
109 #if $method.n_pcs 109 #if $method.n_pcs
110 n_pcs=$method.n_pcs, 110 n_pcs=$method.n_pcs,
111 #end if 111 #end if
112 perplexity=$method.perplexity, 112 perplexity=$method.perplexity,
113 early_exaggeration=$method.early_exaggeration, 113 early_exaggeration=$method.early_exaggeration,
114 learning_rate=$method.learning_rate, 114 learning_rate=$method.learning_rate,
115 random_state=$method.random_state, 115 random_state=$method.random_state,
116 use_fast_tsne=$method.use_fast_tsne,
116 copy=False) 117 copy=False)
117 np.savetxt('$X_tsne', adata.obsm['X_tsne'], delimiter='\t') 118
118 #elif $method.method == 'tl.umap' 119 #else if $method.method == 'tl.umap'
119 sc.tl.umap( 120 sc.tl.umap(
120 adata=adata, 121 adata=adata,
121 min_dist=$method.min_dist, 122 min_dist=$method.min_dist,
122 spread=$method.spread, 123 spread=$method.spread,
123 n_components=$method.n_components, 124 n_components=$method.n_components,
128 gamma=$method.gamma, 129 gamma=$method.gamma,
129 negative_sample_rate=$method.negative_sample_rate, 130 negative_sample_rate=$method.negative_sample_rate,
130 init_pos='$method.init_pos', 131 init_pos='$method.init_pos',
131 random_state=$method.random_state, 132 random_state=$method.random_state,
132 copy=False) 133 copy=False)
133 np.savetxt('$X_umap', adata.obsm['X_umap'], delimiter='\t') 134
134 #elif $method.method == 'pp.neighbors' 135 #else if $method.method == 'tl.draw_graph'
135 sc.pp.neighbors( 136
137 #if str($method.adjacency) != 'None'
138 from scipy import io
139 adjacency = io.mmread('$method.adjacency')
140 #end if
141
142 sc.tl.draw_graph(
136 adata=adata, 143 adata=adata,
137 n_neighbors=$method.n_neighbors, 144 layout='$method.layout',
138 #if $method.n_pcs 145 #if str($method.root) != ''
139 n_pcs=$method.n_pcs, 146 #set $root=([int(x.strip()) for x in str($method.root).split(',')])
147 root=$root,
148 #end if
149 random_state=$method.random_state,
150 #if str($method.init_pos) != ''
151 init_pos='$method.init_pos',
140 #end if 152 #end if
141 knn=$method.knn, 153 #if str($method.adjacency) != 'None'
142 random_state=$method.random_state, 154 adjacency=adjacency,
143 method='$method.pp_neighbors_method', 155 #end if
144 metric='$method.metric', 156 #if str($method.key_ext) != ''
157 key_ext='$method.key_ext',
158 #end if
145 copy=False) 159 copy=False)
146 #elif $method.method == 'tl.rank_genes_groups' 160
147 sc.tl.rank_genes_groups( 161 #else if $method.method == "tl.paga"
162 sc.tl.paga(
148 adata=adata, 163 adata=adata,
149 groupby='$method.groupby',
150 use_raw=$method.use_raw,
151 #if str($method.groups) != ''
152 groups='$method.groups', 164 groups='$method.groups',
153 #end if 165 use_rna_velocity=$method.use_rna_velocity,
154 #if $method.ref.rest == 'rest' 166 model='$method.model',
155 reference='$method.ref.rest', 167 copy=False)
156 #else 168
157 reference='$method.ref.reference', 169 #else if $method.method == "tl.dpt"
158 #end if 170 sc.tl.dpt(
159 n_genes=$method.n_genes, 171 adata=adata,
160 method='$method.tl_rank_genes_groups_method.method', 172 n_dcs=$method.n_dcs,
161 #if $method.tl_rank_genes_groups_method.method == 'logreg' 173 n_branchings=$method.n_branchings,
162 solver='$method.tl_rank_genes_groups_method.solver.solver', 174 min_group_size=$method.min_group_size,
163 #if $method.tl_rank_genes_groups_method.solver.solver == 'newton-cg' 175 allow_kendall_tau_shift=$method.allow_kendall_tau_shift,
164 penalty='l2', 176 copy=False)
165 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
166 max_iter=$method.tl_rank_genes_groups_method.solver.max_iter,
167 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
168 #else if $method.tl_rank_genes_groups_method.solver.solver == 'lbfgs'
169 penalty='l2',
170 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
171 max_iter=$method.tl_rank_genes_groups_method.solver.max_iter,
172 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
173 #else if $method.tl_rank_genes_groups_method.solver.solver == 'liblinear'
174 #if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l1'
175 penalty='l1',
176 #else if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l2'
177 penalty='l2',
178 dual=$method.tl_rank_genes_groups_method.solver.penalty.dual,
179 #else
180 penalty='$method.tl_rank_genes_groups_method.solver.penalty.pen',
181 #end if
182 fit_intercept=$method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept,
183 #if $method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept == 'True'
184 intercept_scaling=$method.tl_rank_genes_groups_method.solver.intercept_scaling.intercept_scaling,
185 #end if
186 #if $method.tl_rank_genes_groups_method.solver.random_state
187 random_state=$method.tl_rank_genes_groups_method.solver.random_state,
188 #end if
189 #else if $method.tl_rank_genes_groups_method.solver.solver == 'sag'
190 penalty='l2',
191 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
192 #if $method.tl_rank_genes_groups_method.solver.random_state
193 random_state=$method.tl_rank_genes_groups_method.solver.random_state,
194 #end if
195 max_iter=$method.tl_rank_genes_groups_method.solver.max_iter,
196 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
197 #else if $method.tl_rank_genes_groups_method.solver.solver == 'saga'
198 #if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l1'
199 penalty='l1',
200 #else if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l2'
201 penalty='l2',
202 #else
203 penalty='$method.tl_rank_genes_groups_method.solver.penalty.pen',
204 #end if
205 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
206 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
207 #end if
208 tol=$method.tl_rank_genes_groups_method.tol,
209 C=$method.tl_rank_genes_groups_method.c,
210 #end if
211 only_positive=$method.only_positive)
212 pd.options.display.precision = 15
213 pd.DataFrame(adata.uns['rank_genes_groups']['names']).to_csv("$names", sep="\t", index = False)
214 pd.DataFrame(adata.uns['rank_genes_groups']['scores']).to_csv("$scores", sep="\t", index = False)
215 #end if 177 #end if
216 178
217 @CMD_anndata_write_outputs@ 179 @CMD_anndata_write_outputs@
218 ]]></configfile> 180 ]]></configfile>
219 </configfiles> 181 </configfiles>
220 <inputs> 182 <inputs>
221 <expand macro="inputs_anndata"/> 183 <expand macro="inputs_anndata"/>
222 <conditional name="method"> 184 <conditional name="method">
223 <param argument="method" type="select" label="Method used for plotting"> 185 <param argument="method" type="select" label="Method used for plotting">
224 <!--<option value="tl.leiden">, using `tl.leiden`</option>!-->
225 <option value="tl.louvain">Cluster cells into subgroups, using `tl.louvain`</option> 186 <option value="tl.louvain">Cluster cells into subgroups, using `tl.louvain`</option>
187 <option value="tl.leiden">Cluster cells into subgroups, using `tl.leiden`</option>
226 <option value="pp.pca">Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using `pp.pca`</option> 188 <option value="pp.pca">Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using `pp.pca`</option>
227 <option value="tl.pca">Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using `tl.pca`</option> 189 <option value="tl.pca">Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using `tl.pca`</option>
228 <option value="tl.diffmap">Diffusion Maps, using `tl.diffmap`</option> 190 <option value="tl.diffmap">Diffusion Maps, using `tl.diffmap`</option>
229 <option value="tl.tsne">t-distributed stochastic neighborhood embedding (tSNE), using `tl.tsne`</option> 191 <option value="tl.tsne">t-distributed stochastic neighborhood embedding (tSNE), using `tl.tsne`</option>
230 <option value="tl.umap">Embed the neighborhood graph using UMAP, using `tl.umap`</option> 192 <option value="tl.umap">Embed the neighborhood graph using UMAP, using `tl.umap`</option>
231 <!--<option value="tl.phate">, using `tl.phate`</option>!--> 193 <option value="tl.draw_graph">Force-directed graph drawing, using `tl.draw_graph`</option>
232 <option value="pp.neighbors">Compute a neighborhood graph of observations, using `pp.neighbors`</option> 194 <option value="tl.dpt">Infer progression of cells through geodesic distance along the graph, using `tl.dpt`</option>
233 <option value="tl.rank_genes_groups">Rank genes for characterizing groups, using `tl.rank_genes_groups`</option> 195 <option value="tl.paga">Generate cellular maps of differentiation manifolds with complex topologies, using `tl.paga`</option>
234 </param> 196 </param>
235 <when value="tl.louvain"> 197 <when value="tl.louvain">
236 <conditional name="flavor"> 198 <conditional name="flavor">
237 <param argument="flavor" type="select" label="Flavor for the clustering" help=""> 199 <param argument="flavor" type="select" label="Flavor for the clustering" help="">
238 <option value="vtraag">vtraag (much more powerful)</option> 200 <option value="vtraag">vtraag (much more powerful)</option>
243 label="Resolution" 205 label="Resolution"
244 help="Higher resolution means finding more and smaller clusters, which defaults to 1.0. See “Time as a resolution parameter” in Lambiotte et al, 2009"/> 206 help="Higher resolution means finding more and smaller clusters, which defaults to 1.0. See “Time as a resolution parameter” in Lambiotte et al, 2009"/>
245 </when> 207 </when>
246 <when value="igraph"/> 208 <when value="igraph"/>
247 </conditional> 209 </conditional>
248 <param argument="random_state" type="integer" value="0" label="Random state" help="Change the initialization of the optimization."/> 210 <expand macro="param_random_state"/>
249 <param argument="key_added" type="text" value="louvain" optional="true" label="Key under which to add the cluster labels" help=""/> 211 <param argument="key_added" type="text" value="louvain" optional="true" label="Key under which to add the cluster labels" help=""/>
212 <param argument="directed" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Interpret the adjacency matrix as directed graph?"/>
213 <expand macro="param_use_weights"/>
214 </when>
215 <when value="tl.leiden">
216 <param argument="resolution" type="float" value="1" label="Coarseness of the clusterin" help="Higher values lead to more clusters"/>
217 <expand macro="param_random_state"/>
218 <param argument="key_added" type="text" value="leiden" label="Key under which to add the cluster labels" help=""/>
219 <expand macro="param_use_weights"/>
220 <param argument="n_iterations" type="integer" value="-1" label="How many iterations of the Leiden clustering algorithm to perform." help="Positive values above 2 define the total number of iterations to perform, -1 has the algorithm run until it reaches its optimal clustering."/>
250 </when> 221 </when>
251 <when value="pp.pca"> 222 <when value="pp.pca">
252 <expand macro="pca_inputs"/> 223 <expand macro="pca_inputs"/>
253 </when> 224 </when>
254 <when value="tl.pca"> 225 <when value="tl.pca">
261 <param name="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use" help=""/> 232 <param name="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use" help=""/>
262 <param name="perplexity" type="float" value="30" label="Perplexity" help="The perplexity is related to the number of nearest neighbors that is used in other manifold learning algorithms. Larger datasets usually require a larger perplexity. Consider selecting a value between 5 and 50. The choice is not extremely critical since t-SNE is quite insensitive to this parameter."/> 233 <param name="perplexity" type="float" value="30" label="Perplexity" help="The perplexity is related to the number of nearest neighbors that is used in other manifold learning algorithms. Larger datasets usually require a larger perplexity. Consider selecting a value between 5 and 50. The choice is not extremely critical since t-SNE is quite insensitive to this parameter."/>
263 <param name="early_exaggeration" type="float" value="12.0" label="Early exaggeration" help="Controls how tight natural clusters in the original space are in the embedded space and how much space will be between them. For larger values, the space between natural clusters will be larger in the embedded space. Again, the choice of this parameter is not very critical. If the cost function increases during initial optimization, the early exaggeration factor or the learning rate might be too high."/> 234 <param name="early_exaggeration" type="float" value="12.0" label="Early exaggeration" help="Controls how tight natural clusters in the original space are in the embedded space and how much space will be between them. For larger values, the space between natural clusters will be larger in the embedded space. Again, the choice of this parameter is not very critical. If the cost function increases during initial optimization, the early exaggeration factor or the learning rate might be too high."/>
264 <param name="learning_rate" type="float" value="1000" label="Learning rate" help="The learning rate can be a critical parameter. It should be between 100 and 1000. If the cost function increases during initial optimization, the early exaggeration factor or the learning rate might be too high. If the cost function gets stuck in a bad local minimum increasing the learning rate helps sometimes."/> 235 <param name="learning_rate" type="float" value="1000" label="Learning rate" help="The learning rate can be a critical parameter. It should be between 100 and 1000. If the cost function increases during initial optimization, the early exaggeration factor or the learning rate might be too high. If the cost function gets stuck in a bad local minimum increasing the learning rate helps sometimes."/>
265 <param name="random_state" type="integer" value="0" label="Random state" help="Change this to use different intial states for the optimization"/> 236 <param name="random_state" type="integer" value="0" label="Random state" help="Change this to use different intial states for the optimization"/>
237 <param argument="use_fast_tsne" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use the MulticoreTSNE package if possible?"/>
266 </when> 238 </when>
267 <when value="tl.umap"> 239 <when value="tl.umap">
268 <param argument="min_dist" type="float" value="0.5" label="Effective minimum distance between embedded points" help="Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points. The value should be set relative to the `spread` value, which determines the scale at which embedded points will be spread out. The default of in the `umap-learn` package is 0.1."/> 240 <param argument="min_dist" type="float" value="0.5" label="Effective minimum distance between embedded points" help="Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points. The value should be set relative to the `spread` value, which determines the scale at which embedded points will be spread out. The default of in the `umap-learn` package is 0.1."/>
269 <param argument="spread" type="float" value="1.0" label="Effective scale of embedded points" help="In combination with `min_dist` this determines how clustered/clumped the embedded points are."/> 241 <param argument="spread" type="float" value="1.0" label="Effective scale of embedded points" help="In combination with `min_dist` this determines how clustered/clumped the embedded points are."/>
270 <param argument="n_components" type="integer" min="0" value="2" label="Number of dimensions of the embedding" help=""/> 242 <param argument="n_components" type="integer" min="0" value="2" label="Number of dimensions of the embedding" help=""/>
275 <param argument="init_pos" type="select" label="How to initialize the low dimensional embedding" help="Called `init` in the original UMAP"> 247 <param argument="init_pos" type="select" label="How to initialize the low dimensional embedding" help="Called `init` in the original UMAP">
276 <option value="paga">Position from paga</option> 248 <option value="paga">Position from paga</option>
277 <option value="spectral" selected="true">Spectral embedding of the graph</option> 249 <option value="spectral" selected="true">Spectral embedding of the graph</option>
278 <option value="random">Initial embedding positions at random</option> 250 <option value="random">Initial embedding positions at random</option>
279 </param> 251 </param>
280 <param argument="random_state" type="integer" value="0" label="Random state" help="Change this to use different intial states for the optimization"/> 252 <expand macro="param_random_state"/>
281 </when> 253 </when>
282 <when value="pp.neighbors"> 254 <when value="tl.draw_graph">
283 <param argument="n_neighbors" type="integer" min="0" value="15" label="The size of local neighborhood (in terms of number of neighboring data points) used for manifold approximation" help="Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. If `knn` is `True`, number of nearest neighbors to be searched. If `knn` is `False`, a Gaussian kernel width is set to the distance of the `n_neighbors` neighbor."/> 255 <expand macro="param_layout"/>
284 <param argument="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use" help=""/> 256 <expand macro="param_root"/>
285 <param argument="knn" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use a hard threshold to restrict the number of neighbors to n_neighbors?" help="If true, it considers a knn graph. Otherwise, it uses a Gaussian Kernel to assign low weights to neighbors more distant than the `n_neighbors` nearest neighbor."/> 257 <expand macro="param_random_state"/>
286 <param argument="random_state" type="integer" value="0" label="Numpy random seed" help=""/> 258 <param argument="init_pos" type="text" optional="true" value="" label="Precomputed coordinates for initialization" help="It should be a valid 2d observation (e.g. paga)"/>
287 <param name="pp_neighbors_method" argument="method" type="select" label="Method for computing connectivities" help=""> 259 <param argument="adjacency" type="data" format="mtx" optional="true" label="Sparse adjacency matrix of the graph" help="If not set, it uses the unstructured annotation (uns) / neighbors / connectivities"/>
288 <option value="umap">umap (McInnes et al, 2018)</option> 260 <param argument="key_ext" type="text" optional="true" value="" label="External key" help="If not set, it appends `layout`"/>
289 <option value="gauss">gauss: Gauss kernel following (Coifman et al 2005) with adaptive width (Haghverdi et al 2016)</option> 261 </when>
262 <when value="tl.dpt">
263 <param argument="n_dcs" type="integer" min="0" value="10" label="Number of diffusion components to use" help=""/>
264 <param argument="n_branchings" type="integer" min="0" value="0" label="Number of branchings to detect" help=""/>
265 <param argument="min_group_size" type="float" min="0" value="0.01" label="Min group size" help="During recursive splitting of branches ('dpt groups') for `n_branchings` &gt; 1, do not consider groups that contain less than `min_group_size` data points. If a float, `min_group_size` refers to a fraction of the total number of data points."/>
266 <param argument="allow_kendall_tau_shift" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Allow Kendal tau shift?" help="If a very small branch is detected upon splitting, shift away from maximum correlation in Kendall tau criterion of Haghverdi et al (2016) to stabilize the splitting."/>
267 </when>
268 <when value="tl.paga">
269 <param argument="groups" type="text" value="louvain" label="Key for categorical in the input" help="You can pass your predefined groups by choosing any categorical annotation of observations (`adata.obs`)."/>
270 <param argument="use_rna_velocity" type="boolean" truevalue="False" falsevalue="False" checked="false" label="Use RNA velocity to orient edges in the abstracted graph and estimate transitions?" help="Requires that `adata.uns` contains a directed single-cell graph with key `['velocyto_transitions']`. This feature might be subject to change in the future."/>
271 <param argument="model" type="select" label="PAGA connectivity model" help="">
272 <option value="v1.2">v1.2</option>
273 <option value="v1.0">v1.0</option>
290 </param> 274 </param>
291 <param argument="metric" type="select" label="Distance metric" help=""> 275 </when>
292 <expand macro="distance_metric_options"/> 276 </conditional>
293 </param>
294 </when>
295 <when value="tl.rank_genes_groups">
296 <param argument="groupby" type="text" value="" label="The key of the observations grouping to consider" help=""/>
297 <expand macro="param_use_raw"/>
298 <param argument="groups" type="text" value="" label="Subset of groups to which comparison shall be restricted" help="e.g. ['g1', 'g2', 'g3']. If not passed, a ranking will be generated for all groups."/>
299 <conditional name="ref">
300 <param name="rest" type="select" label="Comparison">
301 <option value="rest">Compare each group to the union of the rest of the group</option>
302 <option value="group_id">Compare with respect to a specific group</option>
303 </param>
304 <when value="rest"/>
305 <when value="group_id">
306 <param argument="reference" type="text" value="" label="Group identifier with respect to which compare"/>
307 </when>
308 </conditional>
309 <param argument="n_genes" type="integer" min="0" value="100" label="The number of genes that appear in the returned tables" help=""/>
310 <conditional name="tl_rank_genes_groups_method">
311 <param argument="method" type="select" label="Method">
312 <option value="t-test">t-test</option>
313 <option value="wilcoxon">Wilcoxon-Rank-Sum</option>
314 <option value="t-test_overestim_var" selected="true">t-test with overestimate of variance of each group</option>
315 <option value="logreg">Logistic regression</option>
316 </param>
317 <when value="t-test"/>
318 <when value="wilcoxon"/>
319 <when value="t-test_overestim_var"/>
320 <when value="logreg">
321 <conditional name="solver">
322 <param argument="solver" type="select" label="Algorithm to use in the optimization problem" help="For small datasets, ‘liblinear’ is a good choice, whereas ‘sag’ and ‘saga’ are faster for large ones. For multiclass problems, only ‘newton-cg’, ‘sag’, ‘saga’ and ‘lbfgs’ handle multinomial loss; ‘liblinear’ is limited to one-versus-rest schemes. ‘newton-cg’, ‘lbfgs’ and ‘sag’ only handle L2 penalty, whereas ‘liblinear’ and ‘saga’ handle L1 penalty.">
323 <option value="newton-cg">newton-cg</option>
324 <option value="lbfgs">lbfgs</option>
325 <option value="liblinear">liblinear</option>
326 <option value="sag">sag</option>
327 <option value="saga">saga</option>
328 </param>
329 <when value="newton-cg">
330 <expand macro="fit_intercept"/>
331 <expand macro="max_iter"/>
332 <expand macro="multi_class"/>
333 </when>
334 <when value="lbfgs">
335 <expand macro="fit_intercept"/>
336 <expand macro="max_iter"/>
337 <expand macro="multi_class"/>
338 </when>
339 <when value="liblinear">
340 <conditional name="penalty">
341 <expand macro="penalty"/>
342 <when value="l1"/>
343 <when value="l2">
344 <param argument="dual" type="boolean" truevalue="True" falsevalue="False" checked="false"
345 label="Dual (not primal) formulation?" help="Prefer primal when n_samples > n_features"/>
346 </when>
347 <when value="customized">
348 <expand macro="custom_penalty"/>
349 </when>
350 </conditional>
351 <conditional name="intercept_scaling">
352 <param argument="fit_intercept" type="select"
353 label="Should a constant (a.k.a. bias or intercept) be added to the decision function?" help="">
354 <option value="True">Yes</option>
355 <option value="False">No</option>
356 </param>
357 <when value="True">
358 <param argument="intercept_scaling" type="float" value="1.0"
359 label="Intercept scaling"
360 help="x becomes [x, self.intercept_scaling], i.e. a 'synthetic' feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic_feature_weight."/>
361 </when>
362 <when value="False"/>
363 </conditional>
364 <expand macro="random_state"/>
365 </when>
366 <when value="sag">
367 <expand macro="fit_intercept"/>
368 <expand macro="random_state"/>
369 <expand macro="max_iter"/>
370 <expand macro="multi_class"/>
371 </when>
372 <when value="saga">
373 <conditional name="penalty">
374 <expand macro="penalty"/>
375 <when value="l1"/>
376 <when value="l2"/>
377 <when value="customized">
378 <expand macro="custom_penalty"/>
379 </when>
380 </conditional>
381 <expand macro="fit_intercept"/>
382 <expand macro="multi_class"/>
383 </when>
384 </conditional>
385 <param argument="tol" type="float" value="1e-4" label="Tolerance for stopping criteria" help=""/>
386 <param argument="c" type="float" value="1.0" label="Inverse of regularization strength"
387 help="It must be a positive float. Like in support vector machines, smaller values specify stronger regularization."/>
388 </when>
389 </conditional>
390 <param argument="only_positive" type="boolean" truevalue="True" falsevalue="False" checked="true"
391 label="Only consider positive differences?" help=""/>
392 </when>
393 </conditional>
394 <expand macro="anndata_output_format"/>
395 </inputs> 277 </inputs>
396 <outputs> 278 <outputs>
397 <expand macro="anndata_outputs"/> 279 <expand macro="anndata_outputs"/>
398 <data name="X_pca" format="tabular" label="${tool.name} on ${on_string}: PCA representation of data">
399 <filter>method['method'] == 'pp.pca' or method['method'] == 'tl.pca'</filter>
400 </data>
401 <data name="PCs" format="tabular" label="${tool.name} on ${on_string}: Principal components containing the loadings">
402 <filter>method['method'] == 'pp.pca' or method['method'] == 'tl.pca'</filter>
403 </data>
404 <data name="variance_ratio" format="tabular" label="${tool.name} on ${on_string}: Ratio of explained variance">
405 <filter>method['method'] == 'pp.pca' or method['method'] == 'tl.pca'</filter>
406 </data>
407 <data name="variance" format="tabular" label="${tool.name} on ${on_string}: Explained variance, equivalent to the eigenvalues of the covariance matrix">
408 <filter>method['method'] == 'pp.pca' or method['method'] == 'tl.pca'</filter>
409 </data>
410 <data name="X_diffmap" format="tabular" label="${tool.name} on ${on_string}: Diffusion map representation">
411 <filter>method['method'] == 'tl.diffmap'</filter>
412 </data>
413 <data name="X_tsne" format="tabular" label="${tool.name} on ${on_string}: tSNE coordinates">
414 <filter>method['method'] == 'tl.tsne'</filter>
415 </data>
416 <data name="X_umap" format="tabular" label="${tool.name} on ${on_string}: UMAP coordinates">
417 <filter>method['method'] == 'tl.umap'</filter>
418 </data>
419 <data name="names" format="tabular" label="${tool.name} on ${on_string}: Gene names">
420 <filter>method['method'] == 'tl.rank_genes_groups'</filter>
421 </data>
422 <data name="scores" format="tabular" label="${tool.name} on ${on_string}: Scores">
423 <filter>method['method'] == 'tl.rank_genes_groups'</filter>
424 </data>
425 </outputs> 280 </outputs>
426 <tests> 281 <tests>
427 <test expect_num_outputs="1"> 282 <test>
428 <conditional name="input"> 283 <!-- test 1 -->
429 <param name="format" value="h5ad" /> 284 <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" />
430 <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" />
431 </conditional>
432 <conditional name="method"> 285 <conditional name="method">
433 <param name="method" value="tl.louvain"/> 286 <param name="method" value="tl.louvain"/>
434 <conditional name="flavor"> 287 <conditional name="flavor">
435 <param name="flavor" value="vtraag"/> 288 <param name="flavor" value="vtraag"/>
436 <param name="resolution" value="1.0"/> 289 <param name="resolution" value="1.0"/>
437 </conditional> 290 </conditional>
438 <param name="random_state" value="10"/> 291 <param name="random_state" value="10"/>
439 <param name="key_added" value="louvain"/> 292 <param name="key_added" value="louvain"/>
440 </conditional> 293 <param name="directed" value="true"/>
441 <param name="anndata_output_format" value="h5ad" /> 294 <param name="use_weights" value="false"/>
295 </conditional>
442 <assert_stdout> 296 <assert_stdout>
443 <has_text_matching expression="sc.tl.louvain"/> 297 <has_text_matching expression="sc.tl.louvain"/>
444 <has_text_matching expression="adata=adata"/> 298 <has_text_matching expression="adata=adata"/>
445 <has_text_matching expression="flavor = 'vtraag'"/> 299 <has_text_matching expression="flavor = 'vtraag'"/>
446 <has_text_matching expression="resolution=1.0"/> 300 <has_text_matching expression="resolution=1.0"/>
447 <has_text_matching expression="random_state=10"/> 301 <has_text_matching expression="random_state=10"/>
448 <has_text_matching expression="key_added='louvain'"/> 302 <has_text_matching expression="key_added='louvain'"/>
449 </assert_stdout> 303 <has_text_matching expression="directed=True"/>
450 <output name="anndata_out_h5ad" file="tl.louvain.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"/> 304 <has_text_matching expression="use_weights=False"/>
451 </test> 305 </assert_stdout>
452 <test expect_num_outputs="5"> 306 <output name="anndata_out" file="tl.louvain.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/>
453 <conditional name="input"> 307 </test>
454 <param name="format" value="h5ad" /> 308 <test>
455 <param name="adata" value="krumsiek11.h5ad" /> 309 <!-- test 2 -->
456 </conditional> 310 <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" />
311 <conditional name="method">
312 <param name="method" value="tl.leiden"/>
313 <param name="random_state" value="1"/>
314 <param name="random_state" value="10"/>
315 <param name="key_added" value="leiden"/>
316 <param name="use_weights" value="false"/>
317 <param name="n_iterations" value="-1"/>
318 </conditional>
319 <assert_stdout>
320 <has_text_matching expression="sc.tl.leiden"/>
321 <has_text_matching expression="resolution=1"/>
322 <has_text_matching expression="random_state=10"/>
323 <has_text_matching expression="key_added='leiden'"/>
324 <has_text_matching expression="use_weights=False"/>
325 <has_text_matching expression="n_iterations=-1"/>
326 </assert_stdout>
327 <output name="anndata_out" file="tl.leiden.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/>
328 </test>
329 <test>
330 <!-- test 2 -->
331 <param name="adata" value="krumsiek11.h5ad" />
457 <conditional name="method"> 332 <conditional name="method">
458 <param name="method" value="pp.pca"/> 333 <param name="method" value="pp.pca"/>
459 <param name="n_comps" value="50"/> 334 <param name="n_comps" value="50"/>
460 <param name="dtype" value="float32"/> 335 <param name="dtype" value="float32"/>
461 <conditional name="pca"> 336 <conditional name="pca">
462 <param name="chunked" value="False"/> 337 <param name="chunked" value="False"/>
463 <param name="zero_center" value="True"/> 338 <param name="zero_center" value="true"/>
464 <param name="svd_solver" value="auto"/> 339 <param name="svd_solver" value="auto"/>
465 <param name="random_state" value="0"/> 340 <param name="random_state" value="0"/>
466 </conditional> 341 </conditional>
467 </conditional> 342 <param name="use_highly_variable" value="false"/>
468 <param name="anndata_output_format" value="h5ad" /> 343 </conditional>
469 <assert_stdout> 344 <assert_stdout>
470 <has_text_matching expression="sc.pp.pca"/> 345 <has_text_matching expression="sc.pp.pca"/>
471 <has_text_matching expression="n_comps=50"/> 346 <has_text_matching expression="n_comps=50"/>
472 <has_text_matching expression="dtype='float32'"/> 347 <has_text_matching expression="dtype='float32'"/>
473 <has_text_matching expression="copy=False"/> 348 <has_text_matching expression="copy=False"/>
474 <has_text_matching expression="chunked=False"/> 349 <has_text_matching expression="chunked=False"/>
475 <has_text_matching expression="zero_center='True'"/> 350 <has_text_matching expression="zero_center=True"/>
476 <has_text_matching expression="svd_solver='auto'"/> 351 <has_text_matching expression="svd_solver='auto'"/>
477 <has_text_matching expression="random_state=0"/> 352 <has_text_matching expression="random_state=0"/>
478 </assert_stdout> 353 <has_text_matching expression="use_highly_variable=False"/>
479 <output name="anndata_out_h5ad" file="pp.pca.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> 354 </assert_stdout>
480 <output name="X_pca"> 355 <output name="anndata_out" file="pp.pca.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
481 <assert_contents> 356 </test>
482 <has_text_matching expression="-2.579\d{15}e-01" /> 357 <!--<test>
483 <has_text_matching expression="3.452\d{15}e-01" /> 358 < test 3 >
484 <has_text_matching expression="-6.088\d{15}e-03" /> 359 <param name="adata" value="krumsiek11.h5ad" />
485 <has_n_columns n="10" />
486 </assert_contents>
487 </output>
488 <output name="PCs">
489 <assert_contents>
490 <has_text_matching expression="-2.285\d{15}e-01" />
491 <has_text_matching expression="-3.042\d{15}e-01" />
492 <has_text_matching expression="-2.863\d{15}e-02" />
493 <has_text_matching expression="1.294\d{15}e-01" />
494 <has_n_columns n="10" />
495 </assert_contents>
496 </output>
497 <output name="variance_ratio">
498 <assert_contents>
499 <has_text_matching expression="2.148\d{15}e-01" />
500 <has_text_matching expression="7.596\d{15}e-02" />
501 <has_text_matching expression="5.033\d{15}e-03" />
502 <has_text_matching expression="2.801\d{15}e-05" />
503 <has_n_columns n="1" />
504 </assert_contents>
505 </output>
506 <output name="variance" file="pp.pca.variance.krumsiek11.tabular" />
507 </test>
508 <test expect_num_outputs="5">
509 <conditional name="input">
510 <param name="format" value="h5ad" />
511 <param name="adata" value="krumsiek11.h5ad" />
512 </conditional>
513 <conditional name="method"> 360 <conditional name="method">
514 <param name="method" value="pp.pca"/> 361 <param name="method" value="pp.pca"/>
515 <param name="n_comps" value="20"/> 362 <param name="n_comps" value="20"/>
516 <param name="dtype" value="float32"/> 363 <param name="dtype" value="float32"/>
517 <conditional name="pca"> 364 <conditional name="pca">
518 <param name="chunked" value="True"/> 365 <param name="chunked" value="True"/>
519 <param name="chunk_size" value="50"/> 366 <param name="chunk_size" value="50"/>
520 </conditional> 367 </conditional>
521 </conditional> 368 <param name="use_highly_variable" value="false"/>
522 <param name="anndata_output_format" value="h5ad" /> 369 </conditional>
523 <assert_stdout> 370 <assert_stdout>
524 <has_text_matching expression="sc.pp.pca"/> 371 <has_text_matching expression="sc.pp.pca"/>
525 <has_text_matching expression="data=adata"/> 372 <has_text_matching expression="data=adata"/>
526 <has_text_matching expression="n_comps=20"/> 373 <has_text_matching expression="n_comps=20"/>
527 <has_text_matching expression="dtype='float32'"/> 374 <has_text_matching expression="dtype='float32'"/>
528 <has_text_matching expression="copy=False"/> 375 <has_text_matching expression="copy=False"/>
529 <has_text_matching expression="chunked=True"/> 376 <has_text_matching expression="chunked=True"/>
530 <has_text_matching expression="chunk_size=50"/> 377 <has_text_matching expression="chunk_size=50"/>
531 </assert_stdout> 378 <has_text_matching expression="use_highly_variable=False"/>
532 <output name="anndata_out_h5ad" file="pp.pca.krumsiek11_chunk.h5ad" ftype="h5" compare="sim_size"/> 379 </assert_stdout>
533 <output name="X_pca"> 380 <output name="anndata_out" file="pp.pca.krumsiek11_chunk.h5ad" ftype="h5ad" compare="sim_size"/>
534 <assert_contents> 381 </test>
535 <has_text_matching expression="1.290\d{15}e-03" /> 382 -->
536 <has_text_matching expression="9.231\d{15}e-04" /> 383 <test>
537 <has_text_matching expression="-3.498\d{15}e-02" /> 384 <!-- test 3 -->
538 <has_text_matching expression="-4.921\d{15}e-03" /> 385 <param name="adata" value="krumsiek11.h5ad" />
539 <has_n_columns n="10" />
540 </assert_contents>
541 </output>
542 <output name="PCs">
543 <assert_contents>
544 <has_text_matching expression="2.35298924\d\d\d\d\d\d\d\d\d\de-0\d" />
545 <has_text_matching expression="2.4286999\d\d\d\d\d\d\d\d\d\d\de-0\d" />
546 <has_n_columns n="10" />
547 </assert_contents>
548 </output>
549 <output name="variance_ratio">
550 <assert_contents>
551 <has_text text="6.4362" />
552 <has_text text="2.7348" />
553 <has_n_columns n="1" />
554 </assert_contents>
555 </output>
556 <output name="variance">
557 <assert_contents>
558 <has_text_matching expression="7.540\d{15}e-01" />
559 <has_text_matching expression="1.173\d{15}e-03" />
560 <has_text_matching expression="3.204\d{15}e-05" />
561 <has_n_columns n="1" />
562 </assert_contents>
563 </output>
564 </test>
565 <test expect_num_outputs="5">
566 <conditional name="input">
567 <param name="format" value="h5ad" />
568 <param name="adata" value="krumsiek11.h5ad" />
569 </conditional>
570 <conditional name="method"> 386 <conditional name="method">
571 <param name="method" value="tl.pca"/> 387 <param name="method" value="tl.pca"/>
572 <param name="n_comps" value="50"/> 388 <param name="n_comps" value="50"/>
573 <param name="dtype" value="float32"/> 389 <param name="dtype" value="float32"/>
574 <conditional name="pca"> 390 <conditional name="pca">
575 <param name="chunked" value="False"/> 391 <param name="chunked" value="False"/>
576 <param name="zero_center" value="True"/> 392 <param name="zero_center" value="True"/>
577 <param name="svd_solver" value="auto"/> 393 <param name="svd_solver" value="auto"/>
578 <param name="random_state" value="0"/> 394 <param name="random_state" value="0"/>
579 </conditional> 395 </conditional>
580 </conditional> 396 <param name="use_highly_variable" value="false"/>
581 <param name="anndata_output_format" value="h5ad" /> 397 </conditional>
582 <assert_stdout> 398 <assert_stdout>
583 <has_text_matching expression="sc.tl.pca"/> 399 <has_text_matching expression="sc.tl.pca"/>
584 <has_text_matching expression="n_comps=50"/> 400 <has_text_matching expression="n_comps=50"/>
585 <has_text_matching expression="dtype='float32'"/> 401 <has_text_matching expression="dtype='float32'"/>
586 <has_text_matching expression="copy=False"/> 402 <has_text_matching expression="copy=False"/>
587 <has_text_matching expression="chunked=False"/> 403 <has_text_matching expression="chunked=False"/>
588 <has_text_matching expression="zero_center='True'"/> 404 <has_text_matching expression="zero_center=True"/>
589 <has_text_matching expression="svd_solver='auto'"/> 405 <has_text_matching expression="svd_solver='auto'"/>
590 </assert_stdout> 406 <has_text_matching expression="use_highly_variable=False"/>
591 <output name="anndata_out_h5ad" file="tl.pca.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> 407 </assert_stdout>
592 <output name="X_pca"> 408 <output name="anndata_out" file="tl.pca.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
593 <assert_contents> 409 </test>
594 <has_text_matching expression="-6.366\d{15}e-01" /> 410 <test>
595 <has_text_matching expression="5.702\d{15}e-03" /> 411 <!-- test 4 -->
596 <has_text_matching expression="1.862\d{15}e-02" /> 412 <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" />
597 <has_text_matching expression="-6.861\d{15}e-02" />
598 <has_n_columns n="10" />
599 </assert_contents>
600 </output>
601 <output name="PCs">
602 <assert_contents>
603 <has_text_matching expression="1.341\d{15}e-01" />
604 <has_text_matching expression="-3.478\d{15}e-03" />
605 <has_text_matching expression="-4.890\d{15}e-02" />
606 <has_text_matching expression="-2.628\d{15}e-02" />
607 <has_n_columns n="10" />
608 </assert_contents>
609 </output>
610 <output name="variance_ratio">
611 <assert_contents>
612 <has_text_matching expression="6.436\d{15}e-01" />
613 <has_text_matching expression="1.316\d{15}e-04" />
614 <has_text_matching expression="2.801\d{15}e-05" />
615 <has_n_columns n="1" />
616 </assert_contents>
617 </output>
618 <output name="variance">
619 <assert_contents>
620 <has_text_matching expression="4.575\d{15}e-02" />
621 <has_text_matching expression="2.166\d{15}e-02" />
622 <has_text_matching expression="5.896\d{15}e-03" />
623 <has_n_columns n="1" />
624 </assert_contents>
625 </output>
626 </test>
627 <test expect_num_outputs="2">
628 <conditional name="input">
629 <param name="format" value="h5ad" />
630 <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" />
631 </conditional>
632 <conditional name="method"> 413 <conditional name="method">
633 <param name="method" value="tl.diffmap"/> 414 <param name="method" value="tl.diffmap"/>
634 <param name="n_comps" value="15"/> 415 <param name="n_comps" value="15"/>
635 </conditional> 416 </conditional>
636 <param name="anndata_output_format" value="h5ad" />
637 <assert_stdout> 417 <assert_stdout>
638 <has_text_matching expression="sc.tl.diffmap"/> 418 <has_text_matching expression="sc.tl.diffmap"/>
639 <has_text_matching expression="n_comps=15"/> 419 <has_text_matching expression="n_comps=15"/>
640 </assert_stdout> 420 </assert_stdout>
641 <output name="anndata_out_h5ad" file="tl.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"/> 421 <output name="anndata_out" file="tl.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/>
642 <output name="X_diffmap" file="tl.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.X_diffmap.tabular"/> 422 </test>
643 </test> 423 <test>
644 <test expect_num_outputs="2"> 424 <!-- test 5 -->
645 <conditional name="input"> 425 <param name="adata" value="krumsiek11.h5ad" />
646 <param name="format" value="h5ad" />
647 <param name="adata" value="krumsiek11.h5ad" />
648 </conditional>
649 <conditional name="method"> 426 <conditional name="method">
650 <param name="method" value="tl.tsne"/> 427 <param name="method" value="tl.tsne"/>
651 <param name="n_pcs" value="10"/> 428 <param name="n_pcs" value="10"/>
652 <param name="perplexity" value="30"/> 429 <param name="perplexity" value="30"/>
653 <param name="early_exaggeration" value="12.0"/> 430 <param name="early_exaggeration" value="12.0"/>
654 <param name="learning_rate" value="1000"/> 431 <param name="learning_rate" value="1000"/>
655 <param name="random_state" value="0"/> 432 <param name="random_state" value="0"/>
656 </conditional> 433 <param name="use_fast_tsne" value="true"/>
657 <param name="anndata_output_format" value="h5ad" /> 434 </conditional>
658 <assert_stdout> 435 <assert_stdout>
659 <has_text_matching expression="sc.tl.tsne"/> 436 <has_text_matching expression="sc.tl.tsne"/>
660 <has_text_matching expression="n_pcs=10"/> 437 <has_text_matching expression="n_pcs=10"/>
661 <has_text_matching expression="perplexity=30.0"/> 438 <has_text_matching expression="perplexity=30.0"/>
662 <has_text_matching expression="early_exaggeration=12.0"/> 439 <has_text_matching expression="early_exaggeration=12.0"/>
663 <has_text_matching expression="learning_rate=1000.0"/> 440 <has_text_matching expression="learning_rate=1000.0"/>
664 <has_text_matching expression="random_state=0"/> 441 <has_text_matching expression="random_state=0"/>
665 </assert_stdout> 442 <has_text_matching expression="use_fast_tsne=True"/>
666 <output name="anndata_out_h5ad" file="tl.tsne.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> 443 </assert_stdout>
667 <output name="X_tsne" file="tl.tsne.krumsiek11_X_tsne.tabular"/> 444 <output name="anndata_out" file="tl.tsne.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
668 </test> 445 </test>
669 <test expect_num_outputs="2" > 446 <test>
670 <conditional name="input"> 447 <!-- test 6 -->
671 <param name="format" value="h5ad" /> 448 <param name="adata" value="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" />
672 <param name="adata" value="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" />
673 </conditional>
674 <conditional name="method"> 449 <conditional name="method">
675 <param name="method" value="tl.umap"/> 450 <param name="method" value="tl.umap"/>
676 <param name="min_dist" value="0.5"/> 451 <param name="min_dist" value="0.5"/>
677 <param name="spread" value="1.0"/> 452 <param name="spread" value="1.0"/>
678 <param name="n_components" value="2"/> 453 <param name="n_components" value="2"/>
681 <param name="gamma" value="1.0"/> 456 <param name="gamma" value="1.0"/>
682 <param name="negative_sample_rate" value="5"/> 457 <param name="negative_sample_rate" value="5"/>
683 <param name="init_pos" value="spectral"/> 458 <param name="init_pos" value="spectral"/>
684 <param name="random_state" value="0"/> 459 <param name="random_state" value="0"/>
685 </conditional> 460 </conditional>
686 <param name="anndata_output_format" value="h5ad" />
687 <assert_stdout> 461 <assert_stdout>
688 <has_text_matching expression="sc.tl.umap"/> 462 <has_text_matching expression="sc.tl.umap"/>
689 <has_text_matching expression="min_dist=0.5"/> 463 <has_text_matching expression="min_dist=0.5"/>
690 <has_text_matching expression="spread=1.0"/> 464 <has_text_matching expression="spread=1.0"/>
691 <has_text_matching expression="n_components=2"/> 465 <has_text_matching expression="n_components=2"/>
694 <has_text_matching expression="gamma=1.0"/> 468 <has_text_matching expression="gamma=1.0"/>
695 <has_text_matching expression="negative_sample_rate=5"/> 469 <has_text_matching expression="negative_sample_rate=5"/>
696 <has_text_matching expression="init_pos='spectral'"/> 470 <has_text_matching expression="init_pos='spectral'"/>
697 <has_text_matching expression="random_state=0"/> 471 <has_text_matching expression="random_state=0"/>
698 </assert_stdout> 472 </assert_stdout>
699 <output name="anndata_out_h5ad" file="tl.umap.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"> 473 <output name="anndata_out" file="tl.umap.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size">
700 <assert_contents> 474 <assert_contents>
701 <has_h5_keys keys="X, obs, obsm, uns, var" /> 475 <has_h5_keys keys="X, obs, obsm, uns, var" />
702 </assert_contents> 476 </assert_contents>
703 </output> 477 </output>
704 <output name="X_umap"> 478 </test>
705 <assert_contents> 479 <test>
706 <has_text text="2.31791388" /> 480 <!-- test 7 -->
707 <has_text text="-4.8602690" /> 481 <param name="adata" value="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad"/>
708 <has_text text="-1.8031970" /> 482 <conditional name="method">
709 <has_text text="2.31166780" /> 483 <param name="method" value="tl.draw_graph"/>
710 <has_n_columns n="2" /> 484 <param name="layout" value="fa"/>
711 </assert_contents>
712 </output>
713 </test>
714 <test expect_num_outputs="1">
715 <conditional name="input">
716 <param name="format" value="h5ad" />
717 <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad" />
718 </conditional>
719 <conditional name="method">
720 <param name="method" value="pp.neighbors"/>
721 <param name="n_neighbors" value="15"/>
722 <param name="knn" value="True"/>
723 <param name="random_state" value="0"/> 485 <param name="random_state" value="0"/>
724 <param name="pp_neighbors_method" value="umap"/> 486 </conditional>
725 <param name="metric" value="euclidean"/> 487 <assert_stdout>
726 </conditional> 488 <has_text_matching expression="sc.tl.draw_graph"/>
727 <param name="anndata_output_format" value="h5ad" /> 489 <has_text_matching expression="layout='fa'"/>
728 <assert_stdout>
729 <has_text_matching expression="sc.pp.neighbors"/>
730 <has_text_matching expression="n_neighbors=15"/>
731 <has_text_matching expression="knn=True"/>
732 <has_text_matching expression="random_state=0"/> 490 <has_text_matching expression="random_state=0"/>
733 <has_text_matching expression="method='umap'"/> 491 </assert_stdout>
734 <has_text_matching expression="metric='euclidean'"/> 492 <output name="anndata_out" file="tl.draw_graph.pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/>
735 </assert_stdout> 493 </test>
736 <output name="anndata_out_h5ad" file="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"> 494 <test>
737 <assert_contents> 495 <!-- test 8 -->
738 <has_h5_keys keys="X, obs, obsm, uns, var" /> 496 <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad"/>
739 </assert_contents> 497 <conditional name="method">
740 </output> 498 <param name="method" value="tl.paga"/>
741 </test> 499 <param name="groups" value="paul15_clusters"/>
742 <test expect_num_outputs="1"> 500 <param name="use_rna_velocity" value="False"/>
743 <conditional name="input"> 501 <param name="model" value="v1.2"/>
744 <param name="format" value="h5ad" /> 502 </conditional>
745 <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad" /> 503 <assert_stdout>
746 </conditional> 504 <has_text_matching expression="sc.tl.paga"/>
747 <conditional name="method"> 505 <has_text_matching expression="groups='paul15_clusters'"/>
748 <param name="method" value="pp.neighbors"/> 506 <has_text_matching expression="use_rna_velocity=False"/>
749 <param name="n_neighbors" value="15"/> 507 <has_text_matching expression="model='v1.2'"/>
750 <param name="knn" value="True"/> 508 </assert_stdout>
751 <param name="pp_neighbors_method" value="gauss"/> 509 <output name="anndata_out" file="tl.paga.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/>
752 <param name="metric" value="braycurtis"/> 510 </test>
753 </conditional> 511 <test>
754 <param name="anndata_output_format" value="h5ad" /> 512 <!-- test 9 -->
755 <assert_stdout> 513 <param name="adata" value="tl.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" />
756 <has_text_matching expression="sc.pp.neighbors"/> 514 <conditional name="method">
757 <has_text_matching expression="n_neighbors=15"/> 515 <param name="method" value="tl.dpt"/>
758 <has_text_matching expression="knn=True"/> 516 <param name="n_dcs" value="15"/>
759 <has_text_matching expression="random_state=0"/> 517 <param name="n_branchings" value="1"/>
760 <has_text_matching expression="method='gauss'"/> 518 <param name="min_group_size" value="0.01"/>
761 <has_text_matching expression="metric='braycurtis'"/> 519 <param name="allow_kendall_tau_shift" value="True"/>
762 </assert_stdout> 520 </conditional>
763 <output name="anndata_out_h5ad" file="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"/> 521 <assert_stdout>
764 </test> 522 <has_text_matching expression="sc.tl.dpt"/>
765 <test expect_num_outputs="3"> 523 <has_text_matching expression="n_dcs=15"/>
766 <conditional name="input"> 524 <has_text_matching expression="n_branchings=1"/>
767 <param name="format" value="h5ad" /> 525 <has_text_matching expression="min_group_size=0.01"/>
768 <param name="adata" value="krumsiek11.h5ad" /> 526 <has_text_matching expression="allow_kendall_tau_shift=True"/>
769 </conditional> 527 </assert_stdout>
770 <conditional name="method"> 528 <output name="anndata_out" file="tl.dpt.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/>
771 <param name="method" value="tl.rank_genes_groups"/>
772 <param name="groupby" value="cell_type"/>
773 <param name="use_raw" value="True"/>
774 <conditional name="ref">
775 <param name="rest" value="rest"/>
776 </conditional>
777 <param name="n_genes" value="100"/>
778 <conditional name="tl_rank_genes_groups_method">
779 <param name="method" value="t-test_overestim_var"/>
780 </conditional>
781 <param name="only_positive" value="True"/>
782 </conditional>
783 <param name="anndata_output_format" value="h5ad" />
784 <assert_stdout>
785 <has_text_matching expression="sc.tl.rank_genes_groups"/>
786 <has_text_matching expression="groupby='cell_type'"/>
787 <has_text_matching expression="use_raw=True"/>
788 <has_text_matching expression="reference='rest'"/>
789 <has_text_matching expression="n_genes=100"/>
790 <has_text_matching expression="method='t-test_overestim_var'"/>
791 <has_text_matching expression="only_positive=True"/>
792 </assert_stdout>
793 <output name="anndata_out_h5ad" file="tl.rank_genes_groups.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
794 <output name="names">
795 <assert_contents>
796 <has_n_columns n="5" />
797 <has_text_matching expression="Ery\tMk\tMo\tNeu\tprogenitor"/>
798 <has_text_matching expression="Gata1\tFog1\tCebpa\tFli1\tGata2"/>
799 <has_text_matching expression="EgrNab\tEgrNab\tSCL\tSCL\tGfi1"/>
800 </assert_contents>
801 </output>
802 <output name="scores">
803 <assert_contents>
804 <has_n_columns n="5" />
805 <has_text_matching expression="Ery\tMk\tMo\tNeu\tprogenitor"/>
806 <has_text_matching expression="18.86\d{4}"/>
807 <has_text_matching expression="17.85\d{4}"/>
808 <has_text_matching expression="-2.63\d{4}"/>
809 <has_text_matching expression="-2.98\d{4}"/>
810 <has_text_matching expression="-6.41\d{4}"/>
811 </assert_contents>
812 </output>
813 </test>
814 <test expect_num_outputs="3">
815 <conditional name="input">
816 <param name="format" value="h5ad" />
817 <param name="adata" value="pbmc68k_reduced.h5ad" />
818 </conditional>
819 <conditional name="method">
820 <param name="method" value="tl.rank_genes_groups"/>
821 <param name="groupby" value="louvain"/>
822 <param name="use_raw" value="True"/>
823 <conditional name="ref">
824 <param name="rest" value="rest"/>
825 </conditional>
826 <param name="n_genes" value="100"/>
827 <conditional name="tl_rank_genes_groups_method">
828 <param name="method" value="logreg"/>
829 <conditional name="solver">
830 <param name="solver" value="newton-cg"/>
831 <param name="fit_intercept" value="True"/>
832 <param name="max_iter" value="100"/>
833 <param name="multi_class" value="auto"/>
834 </conditional>
835 <param name="tol" value="1e-4"/>
836 <param name="c" value="1.0"/>
837 </conditional>
838 <param name="only_positive" value="True"/>
839 </conditional>
840 <param name="anndata_output_format" value="h5ad" />
841 <assert_stdout>
842 <has_text_matching expression="sc.tl.rank_genes_groups"/>
843 <has_text_matching expression="groupby='louvain'"/>
844 <has_text_matching expression="use_raw=True"/>
845 <has_text_matching expression="reference='rest'"/>
846 <has_text_matching expression="n_genes=100"/>
847 <has_text_matching expression="method='logreg'"/>
848 <has_text_matching expression="solver='newton-cg'"/>
849 <has_text_matching expression="penalty='l2'"/>
850 <has_text_matching expression="fit_intercept=True"/>
851 <has_text_matching expression="max_iter=100"/>
852 <has_text_matching expression="multi_class='auto'"/>
853 <has_text_matching expression="tol=0.0001"/>
854 <has_text_matching expression="C=1.0"/>
855 <has_text_matching expression="only_positive=True"/>
856 </assert_stdout>
857 <output name="anndata_out_h5ad" ftype="h5">
858 <assert_contents>
859 <has_h5_keys keys="X, obs, obsm, raw.X, raw.var, uns, var" />
860 </assert_contents>
861 </output>
862 <output name="names">
863 <assert_contents>
864 <has_n_columns n="7" />
865 <has_text_matching expression="IL32\tFCGR3A\tFCER1A\tLTB\tCPVL\tIGJ\tPRSS57"/>
866 <has_text_matching expression="KIAA0101\tFCER1G\tHLA-DMA\tHLA-DQA1\tNAAA\tMANF\tCCDC104"/>
867 <has_text_matching expression="CCNB2\t"/>
868 </assert_contents>
869 </output>
870 <output name="scores">
871 <assert_contents>
872 <has_n_columns n="7" />
873 <has_text_matching expression="0.088\d+"/>
874 <has_text_matching expression="0.114\d+"/>
875 <has_text_matching expression="0.034\d+"/>
876 <has_text_matching expression="0.035\d+"/>
877 <has_text_matching expression="0.041\d+"/>
878 </assert_contents>
879 </output>
880 </test>
881 <test expect_num_outputs="3">
882 <conditional name="input">
883 <param name="format" value="h5ad" />
884 <param name="adata" value="pbmc68k_reduced.h5ad" />
885 </conditional>
886 <conditional name="method">
887 <param name="method" value="tl.rank_genes_groups"/>
888 <param name="groupby" value="louvain"/>
889 <param name="use_raw" value="True"/>
890 <conditional name="ref">
891 <param name="rest" value="rest"/>
892 </conditional>
893 <param name="n_genes" value="100"/>
894 <conditional name="tl_rank_genes_groups_method">
895 <param name="method" value="logreg"/>
896 <conditional name="solver">
897 <param name="solver" value="liblinear"/>
898 <conditional name="penalty">
899 <param name="penalty" value="l2"/>
900 <param name="dual" value="False"/>
901 <conditional name="intercept_scaling">
902 <param name="fit_intercept" value="True"/>
903 <param name="intercept_scaling" value="1.0" />
904 </conditional>
905 <param name="random_state" value="1"/>
906 </conditional>
907 </conditional>
908 <param name="tol" value="1e-4"/>
909 <param name="c" value="1.0"/>
910 </conditional>
911 <param name="only_positive" value="True"/>
912 </conditional>
913 <param name="anndata_output_format" value="h5ad" />
914 <assert_stdout>
915 <has_text_matching expression="sc.tl.rank_genes_groups"/>
916 <has_text_matching expression="groupby='louvain'"/>
917 <has_text_matching expression="use_raw=True"/>
918 <has_text_matching expression="reference='rest'"/>
919 <has_text_matching expression="n_genes=100"/>
920 <has_text_matching expression="method='logreg'"/>
921 <has_text_matching expression="solver='liblinear'"/>
922 <has_text_matching expression="penalty='l2'"/>
923 <has_text_matching expression="dual=False"/>
924 <has_text_matching expression="fit_intercept=True"/>
925 <has_text_matching expression="intercept_scaling=1.0"/>
926 <has_text_matching expression="tol=0.0001"/>
927 <has_text_matching expression="C=1.0"/>
928 <has_text_matching expression="only_positive=True"/>
929 </assert_stdout>
930 <output name="anndata_out_h5ad" ftype="h5">
931 <assert_contents>
932 <has_h5_keys keys="X, obs, obsm, raw.X, raw.var, uns, var" />
933 </assert_contents>
934 </output>
935 <output name="names">
936 <assert_contents>
937 <has_n_columns n="7" />
938 <has_text_matching expression="AES\tLST1\tRNASE6\tPLAC8\tCST3\tSEC11C\tNASP"/>
939 <has_text_matching expression="GIMAP4\tMIS18BP1\tLILRB4\tTUBA4A\tCOMTD1\tSLC25A4\tLEPROT"/>
940 <has_text_matching expression="GGH\tLYN\tMAGOHB\tAL928768.3\tITGB2-AS1\tCENPH\tASRGL1"/>
941 </assert_contents>
942 </output>
943 <output name="scores">
944 <assert_contents>
945 <has_n_columns n="7" />
946 <has_text_matching expression="0.1680\d{4}\t0.2156\d{4}\t0.281\d{4}\t0.2100\d{4}\t0.2332\d{4}\t0.1586\d{4}\t0.12057\d{4}"/>
947 <has_text_matching expression="0.0784\d{4}\t0.0699\d{4}\t0.06912\d{4}\t0.05364\d{4}\t0.03933\d{4}\t0.03994\d{4}\t0.0411\d{4}"/>
948 <has_text_matching expression="0.06232\d{4}\t0.05563\d{4}\t0.0565\d{4}\t0.04164\d{4}\t0.02636\d{4}\t0.03002\d{4}\t0.032\d{4}"/>
949 </assert_contents>
950 </output>
951 </test> 529 </test>
952 </tests> 530 </tests>
953 <help><![CDATA[ 531 <help><![CDATA[
954 Cluster cells into subgroups, using `tl.louvain` 532 Cluster cells into subgroups (`tl.louvain`)
955 ================================================ 533 ===========================================
956 534
957 Cluster cells using the Louvain algorithm (Blondel et al, 2008) in the implementation 535 Cluster cells using the Louvain algorithm (Blondel et al, 2008) in the implementation
958 of Traag et al,2017. The Louvain algorithm has been proposed for single-cell 536 of Traag et al,2017. The Louvain algorithm has been proposed for single-cell
959 analysis by Levine et al, 2015. 537 analysis by Levine et al, 2015.
960 538
961 This requires to run `pp.neighbors`, first. 539 This requires to run `pp.neighbors`, first.
962 540
963 More details on the `scanpy documentation 541 More details on the `scanpy documentation
964 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.louvain.html#scanpy.api.tl.louvain>`_ 542 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.louvain.html>`_
543
544 Cluster cells into subgroups (`tl.leiden`)
545 ==========================================
546
547 Cluster cells using the Leiden algorithm (Traag et al, 2018), an improved version of the Louvain algorithm (Blondel et al, 2008).
548
549 The Louvain algorithm has been proposed for single-cell analysis by Levine et al, 2015.
550
551 More details on the `scanpy documentation
552 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.leiden.html>`_
965 553
966 Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using `pp.pca` 554 Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using `pp.pca`
967 ============================================================================================================ 555 ============================================================================================================
968 556
969 It uses the implementation of *scikit-learn*. 557 @CMD_pca_outputs@
970 558
971 More details on the `scanpy documentation 559 More details on the `scanpy documentation
972 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.pca.html#scanpy.api.pp.pca>`__ 560 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.pca.html>`__
973 561
974 Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using `tl.pca` 562 Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using `tl.pca`
975 ============================================================================================================ 563 ============================================================================================================
976 564
977 It uses the implementation of *scikit-learn*. 565 @CMD_pca_outputs@
978
979 Diffusion Maps
980 566
981 More details on the `scanpy documentation 567 More details on the `scanpy documentation
982 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.pca.html#scanpy.api.tl.pca>`__ 568 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.pca.html>`__
983 569
984 Diffusion Maps, using `tl.diffmap` 570 Diffusion Maps, using `tl.diffmap`
985 ================================== 571 ==================================
986 572
987 Diffusion maps (Coifman et al 2005) has been proposed for visualizing single-cell 573 Diffusion maps (Coifman et al 2005) has been proposed for visualizing single-cell
994 using a Gaussian kernel, use `method=='gauss'` in 580 using a Gaussian kernel, use `method=='gauss'` in
995 `pp.neighbors`. To use an exponential kernel, use the default 581 `pp.neighbors`. To use an exponential kernel, use the default
996 `method=='umap'`. Differences between these options shouldn't usually be 582 `method=='umap'`. Differences between these options shouldn't usually be
997 dramatic. 583 dramatic.
998 584
999 It returns `X_diffmap`, diffusion map representation of data, which is the right eigen basis of the transition matrix with eigenvectors as columns. 585 The diffusion map representation of data are added to the return AnnData in the multi-dimensional
586 observations annotation (obsm). It is the right eigen basis of the transition matrix with eigenvectors
587 as colum. It can be accessed using the inspect tool for AnnData
1000 588
1001 More details on the `scanpy documentation 589 More details on the `scanpy documentation
1002 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.diffmap.html#scanpy.api.tl.diffmap>`__ 590 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.diffmap.html>`__
1003 591
1004 t-distributed stochastic neighborhood embedding (tSNE), using `tl.tsne` 592 t-distributed stochastic neighborhood embedding (tSNE), using `tl.tsne`
1005 ======================================================================= 593 =======================================================================
1006 594
1007 t-distributed stochastic neighborhood embedding (tSNE) (Maaten et al, 2008) has been 595 t-distributed stochastic neighborhood embedding (tSNE) (Maaten et al, 2008) has been
1009 we use the implementation of *scikit-learn* (Pedregosa et al, 2011). 597 we use the implementation of *scikit-learn* (Pedregosa et al, 2011).
1010 598
1011 It returns `X_tsne`, tSNE coordinates of data. 599 It returns `X_tsne`, tSNE coordinates of data.
1012 600
1013 More details on the `scanpy documentation 601 More details on the `scanpy documentation
1014 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.tsne.html#scanpy.api.tl.tsne>`__ 602 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.tsne.html>`__
1015 603
1016 Embed the neighborhood graph using UMAP, using `tl.umap` 604 Embed the neighborhood graph using UMAP, using `tl.umap`
1017 ======================================================== 605 ========================================================
1018 606
1019 UMAP (Uniform Manifold Approximation and Projection) is a manifold learning 607 UMAP (Uniform Manifold Approximation and Projection) is a manifold learning
1025 distribution of distances in the high-dimensional space. We use the 613 distribution of distances in the high-dimensional space. We use the
1026 implementation of `umap-learn <https://github.com/lmcinnes/umap>`__ 614 implementation of `umap-learn <https://github.com/lmcinnes/umap>`__
1027 (McInnes et al, 2018). For a few comparisons of UMAP with tSNE, see this `preprint 615 (McInnes et al, 2018). For a few comparisons of UMAP with tSNE, see this `preprint
1028 <https://doi.org/10.1101/298430>`__. 616 <https://doi.org/10.1101/298430>`__.
1029 617
1030 It returns `X_umap`, UMAP coordinates of data. 618 The UMAP coordinates of data are added to the return AnnData in the multi-dimensional
619 observations annotation (obsm). This data is accessible using the inspect tool for AnnData
1031 620
1032 More details on the `scanpy documentation 621 More details on the `scanpy documentation
1033 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.umap.html#scanpy.api.tl.umap>`__ 622 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.umap.html>`__
1034 623
1035 Compute a neighborhood graph of observations, using `pp.neighbors` 624 Force-directed graph drawing, using `tl.draw_graph`
1036 ================================================================== 625 ===================================================
1037 626
1038 The neighbor search efficiency of this heavily relies on UMAP (McInnes et al, 2018), 627 Force-directed graph drawing describes a class of long-established algorithms for visualizing graphs.
1039 which also provides a method for estimating connectivities of data points - 628 It has been suggested for visualizing single-cell data by Islam et al, 11.
1040 the connectivity of the manifold (`method=='umap'`). If `method=='diffmap'`, 629 Many other layouts as implemented in igraph are available. Similar approaches have been used by
1041 connectivities are computed according to Coifman et al (2005), in the adaption of 630 Zunder et al, 2015 or Weinreb et al, 2016.
1042 Haghverdi et al (2016). 631
632 This is an alternative to tSNE that often preserves the topology of the data better.
633 This requires to run `pp.neighbors`, first.
634
635 The default layout (ForceAtlas2) uses the package fa2.
636
637 The coordinates of graph layout are added to the return AnnData in the multi-dimensional
638 observations annotation (obsm). This data is accessible using the inspect tool for AnnData.
1043 639
1044 More details on the `scanpy documentation 640 More details on the `scanpy documentation
1045 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.neighbors.html#scanpy.api.pp.neighbors>`__ 641 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.draw_graph.html>`__
1046 642
1047 Rank genes for characterizing groups, using `tl.rank_genes_groups` 643 Infer progression of cells through geodesic distance along the graph (`tl.dpt`)
1048 ================================================================== 644 ===============================================================================
1049 645
1050 It returns: 646 Reconstruct the progression of a biological process from snapshot
1051 647 data. `Diffusion Pseudotime` has been introduced by Haghverdi et al (2016) and
1052 - `Gene names`: Gene names ordered in column by group id and in rows according to scores 648 implemented within Scanpy (Wolf et al, 2017). Here, we use a further developed
1053 - `Scores`: Score for each gene (rows) for each group (columns), same order as for the names 649 version, which is able to deal with disconnected graphs (Wolf et al, 2017) and can
1054 650 be run in a `hierarchical` mode by setting the parameter
1055 More details on the `scanpy documentation 651 `n_branchings>1`. We recommend, however, to only use
1056 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.rank_genes_groups.html#scanpy.api.tl.rank_genes_groups>`__ 652 `tl.dpt` for computing pseudotime (`n_branchings=0`) and
1057 653 to detect branchings via `paga`. For pseudotime, you need
654 to annotate your data with a root cell.
655
656 This requires to run `pp.neighbors`, first. In order to
657 reproduce the original implementation of DPT, use `method=='gauss'` in
658 this. Using the default `method=='umap'` only leads to minor quantitative
659 differences, though.
660
661
662 If `n_branchings==0`, no field `dpt_groups` will be written.
663
664 - dpt_pseudotime : Array of dim (number of samples) that stores the pseudotime of each cell, that is, the DPT distance with respect to the root cell.
665 - dpt_groups : Array of dim (number of samples) that stores the subgroup id ('0','1', ...) for each cell. The groups typically correspond to 'progenitor cells', 'undecided cells' or 'branches' of a process.
666
667 The tool is similar to the R package `destiny` of Angerer et al (2016).
668
669 More details on the `tl.dpt scanpy documentation
670 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.dpt.html>`_
671
672
673 Generate cellular maps of differentiation manifolds with complex topologies (`tl.paga`)
674 =======================================================================================
675
676 By quantifying the connectivity of partitions (groups, clusters) of the
677 single-cell graph, partition-based graph abstraction (PAGA) generates a much
678 simpler abstracted graph (*PAGA graph*) of partitions, in which edge weights
679 represent confidence in the presence of connections. By tresholding this
680 confidence in `paga`, a much simpler representation of data
681 can be obtained.
682
683 The confidence can be interpreted as the ratio of the actual versus the
684 expected value of connetions under the null model of randomly connecting
685 partitions. We do not provide a p-value as this null model does not
686 precisely capture what one would consider "connected" in real data, hence it
687 strongly overestimates the expected value. See an extensive discussion of
688 this in Wolf et al (2017).
689
690 Together with a random walk-based distance measure, this generates a partial
691 coordinatization of data useful for exploring and explaining its variation.
692
693 The returned AnnData object contains:
694
695 - Full adjacency matrix of the abstracted graph, weights correspond to confidence in the connectivities of partition (connectivities)
696 - Adjacency matrix of the tree-like subgraph that best explains the topology (connectivities_tree)
697
698 These datasets are stored in the unstructured annotation (uns) and can be accessed using the inspect tool for AnnData objects
699
700 More details on the `tl.paga scanpy documentation
701 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.paga.html>`_
1058 ]]></help> 702 ]]></help>
1059 <expand macro="citations"/> 703 <expand macro="citations"/>
1060 </tool> 704 </tool>