comparison cluster_reduce_dimension.xml @ 0:0e212e42ef88 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 92f85afaed0097d1879317a9f513093fce5481d6
author iuc
date Mon, 04 Mar 2019 10:13:44 -0500
parents
children 20cfb9f3dded
comparison
equal deleted inserted replaced
-1:000000000000 0:0e212e42ef88
1 <tool id="scanpy_cluster_reduce_dimension" name="Cluster and reduce dimension with scanpy" version="@galaxy_version@">
2 <description></description>
3 <macros>
4 <import>macros.xml</import>
5 <xml name="pca_inputs">
6 <param name="n_comps" type="integer" min="0" value="50" label="Number of principal components to compute" help=""/>
7 <param name="dtype" type="text" value="float32" label="Numpy data type string to which to convert the result" help=""/>
8 <conditional name="pca">
9 <param name="chunked" type="select" label="Type of PCA?">
10 <option value="True">Incremental PCA on segments (incremental PCA automatically zero centers and ignores settings of `random_seed` and `svd_solver`)</option>
11 <option value="False" selected="true">Full PCA</option>
12 </param>
13 <when value="True">
14 <param name="chunk_size" type="integer" min="0" value="" label="chunk_size" help="Number of observations to include in each chunk"/>
15 </when>
16 <when value="False">
17 <param name="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true"
18 label="Compute standard PCA from covariance matrix?"
19 help="If not, it omits zero-centering variables (uses *TruncatedSVD* from scikit-learn), which allows to handle sparse input efficiently."/>
20 <expand macro="svd_solver"/>
21 <param name="random_state" type="integer" value="0" label="Initial states for the optimization" help=""/>
22 </when>
23 </conditional>
24 </xml>
25 <token name="@CMD_pca_outputs@"><![CDATA[
26 np.savetxt('$X_pca', adata.obsm['X_pca'], delimiter='\t')
27 np.savetxt('$PCs', adata.varm['PCs'], delimiter='\t')
28 np.savetxt('$variance', adata.uns['pca']['variance'], delimiter='\t')
29 np.savetxt('$variance_ratio', adata.uns['pca']['variance_ratio'], delimiter='\t')
30 ]]></token>
31 <token name="@CMD_pca_params@"><![CDATA[
32 data=adata,
33 n_comps=$method.n_comps,
34 dtype='$method.dtype',
35 copy=False,
36 chunked=$method.pca.chunked,
37 #if $method.pca.chunked == 'True'
38 chunk_size=$method.pca.chunk_size
39 #else
40 zero_center='$method.pca.zero_center',
41 svd_solver='$method.pca.svd_solver',
42 random_state=$method.pca.random_state
43 #end if
44 ]]></token>
45 <xml name="penalty">
46 <param argument="penalty" type="select" label="Norm used in the penalization" help="">
47 <option value="l1">l1</option>
48 <option value="l2">l2</option>
49 <option value="customized">customized</option>
50 </param>
51 </xml>
52 <xml name="custom_penalty">
53 <param argument="pen" type="text" value="" label="Norm used in the penalization" help=""/>
54 </xml>
55 <xml name="fit_intercept">
56 <param argument="fit_intercept" type="boolean" truevalue="True" falsevalue="False" checked="true"
57 label="Should a constant (a.k.a. bias or intercept) be added to the decision function?" help=""/>
58 </xml>
59 <xml name="random_state">
60 <param argument="random_state" type="integer" value="" optional="true"
61 label="The seed of the pseudo random number generator to use when shuffling the data" help=""/>
62 </xml>
63 <xml name="max_iter">
64 <param argument="max_iter" type="integer" min="0" value="100" label="Maximum number of iterations taken for the solvers to converge" help=""/>
65 </xml>
66 <xml name="multi_class">
67 <param argument="multi_class" type="select" label="Multi class" help="">
68 <option value="ovr">ovr: a binary problem is fit for each label</option>
69 <option value="multinomial">multinomial: the multinomial loss fit across the entire probability distribution, even when the data is binary</option>
70 <option value="auto">auto: selects ‘ovr’ if the data is binary and otherwise selects ‘multinomial’</option>
71 </param>
72 </xml>
73 </macros>
74 <expand macro="requirements"/>
75 <expand macro="version_command"/>
76 <command detect_errors="exit_code"><![CDATA[
77 @CMD@
78 ]]></command>
79 <configfiles>
80 <configfile name="script_file"><![CDATA[
81 @CMD_imports@
82 @CMD_read_inputs@
83
84 #if $method.method == 'tl.louvain'
85 sc.tl.louvain(
86 adata=adata,
87 flavor = '$method.flavor.flavor',
88 #if $method.flavor.flavor == 'vtraag' and $method.flavor.resolution
89 resolution=$method.flavor.resolution,
90 #end if
91 random_state=$method.random_state,
92 key_added='$method.key_added',
93 copy=False)
94 #elif $method.method == 'pp.pca'
95 sc.pp.pca(@CMD_pca_params@)
96 @CMD_pca_outputs@
97 #elif $method.method == 'tl.pca'
98 sc.tl.pca(@CMD_pca_params@)
99 @CMD_pca_outputs@
100 #elif $method.method == 'tl.diffmap'
101 sc.tl.diffmap(
102 adata=adata,
103 n_comps=$method.n_comps,
104 copy =False)
105 np.savetxt('$X_diffmap', adata.obsm['X_diffmap'], delimiter='\t')
106 #elif $method.method == 'tl.tsne'
107 sc.tl.tsne(
108 adata=adata,
109 #if $method.n_pcs
110 n_pcs=$method.n_pcs,
111 #end if
112 perplexity=$method.perplexity,
113 early_exaggeration=$method.early_exaggeration,
114 learning_rate=$method.learning_rate,
115 random_state=$method.random_state,
116 copy=False)
117 np.savetxt('$X_tsne', adata.obsm['X_tsne'], delimiter='\t')
118 #elif $method.method == 'tl.umap'
119 sc.tl.umap(
120 adata=adata,
121 min_dist=$method.min_dist,
122 spread=$method.spread,
123 n_components=$method.n_components,
124 #if $method.maxiter
125 maxiter=$method.maxiter,
126 #end if
127 alpha=$method.alpha,
128 gamma=$method.gamma,
129 negative_sample_rate=$method.negative_sample_rate,
130 init_pos='$method.init_pos',
131 random_state=$method.random_state,
132 copy=False)
133 np.savetxt('$X_umap', adata.obsm['X_umap'], delimiter='\t')
134 #elif $method.method == 'pp.neighbors'
135 sc.pp.neighbors(
136 adata=adata,
137 n_neighbors=$method.n_neighbors,
138 #if $method.n_pcs
139 n_pcs=$method.n_pcs,
140 #end if
141 knn=$method.knn,
142 random_state=$method.random_state,
143 method='$method.pp_neighbors_method',
144 metric='$method.metric',
145 copy=False)
146 #elif $method.method == 'tl.rank_genes_groups'
147 sc.tl.rank_genes_groups(
148 adata=adata,
149 groupby='$method.groupby',
150 use_raw=$method.use_raw,
151 #if str($method.groups) != ''
152 groups='$method.groups',
153 #end if
154 #if $method.ref.rest == 'rest'
155 reference='$method.ref.rest',
156 #else
157 reference='$method.ref.reference',
158 #end if
159 n_genes=$method.n_genes,
160 method='$method.tl_rank_genes_groups_method.method',
161 #if $method.tl_rank_genes_groups_method.method == 'logreg'
162 solver='$method.tl_rank_genes_groups_method.solver.solver',
163 #if $method.tl_rank_genes_groups_method.solver.solver == 'newton-cg'
164 penalty='l2',
165 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
166 max_iter=$method.tl_rank_genes_groups_method.solver.max_iter,
167 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
168 #else if $method.tl_rank_genes_groups_method.solver.solver == 'lbfgs'
169 penalty='l2',
170 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
171 max_iter=$method.tl_rank_genes_groups_method.solver.max_iter,
172 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
173 #else if $method.tl_rank_genes_groups_method.solver.solver == 'liblinear'
174 #if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l1'
175 penalty='l1',
176 #else if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l2'
177 penalty='l2',
178 dual=$method.tl_rank_genes_groups_method.solver.penalty.dual,
179 #else
180 penalty='$method.tl_rank_genes_groups_method.solver.penalty.pen',
181 #end if
182 fit_intercept=$method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept,
183 #if $method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept == 'True'
184 intercept_scaling=$method.tl_rank_genes_groups_method.solver.intercept_scaling.intercept_scaling,
185 #end if
186 #if $method.tl_rank_genes_groups_method.solver.random_state
187 random_state=$method.tl_rank_genes_groups_method.solver.random_state,
188 #end if
189 #else if $method.tl_rank_genes_groups_method.solver.solver == 'sag'
190 penalty='l2',
191 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
192 #if $method.tl_rank_genes_groups_method.solver.random_state
193 random_state=$method.tl_rank_genes_groups_method.solver.random_state,
194 #end if
195 max_iter=$method.tl_rank_genes_groups_method.solver.max_iter,
196 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
197 #else if $method.tl_rank_genes_groups_method.solver.solver == 'saga'
198 #if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l1'
199 penalty='l1',
200 #else if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l2'
201 penalty='l2',
202 #else
203 penalty='$method.tl_rank_genes_groups_method.solver.penalty.pen',
204 #end if
205 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
206 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
207 #end if
208 tol=$method.tl_rank_genes_groups_method.tol,
209 C=$method.tl_rank_genes_groups_method.c,
210 #end if
211 only_positive=$method.only_positive)
212 pd.options.display.precision = 15
213 pd.DataFrame(adata.uns['rank_genes_groups']['names']).to_csv("$names", sep="\t", index = False)
214 pd.DataFrame(adata.uns['rank_genes_groups']['scores']).to_csv("$scores", sep="\t", index = False)
215 #end if
216
217 @CMD_anndata_write_outputs@
218 ]]></configfile>
219 </configfiles>
220 <inputs>
221 <expand macro="inputs_anndata"/>
222 <conditional name="method">
223 <param argument="method" type="select" label="Method used for plotting">
224 <!--<option value="tl.leiden">, using `tl.leiden`</option>!-->
225 <option value="tl.louvain">Cluster cells into subgroups, using `tl.louvain`</option>
226 <option value="pp.pca">Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using `pp.pca`</option>
227 <option value="tl.pca">Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using `tl.pca`</option>
228 <option value="tl.diffmap">Diffusion Maps, using `tl.diffmap`</option>
229 <option value="tl.tsne">t-distributed stochastic neighborhood embedding (tSNE), using `tl.tsne`</option>
230 <option value="tl.umap">Embed the neighborhood graph using UMAP, using `tl.umap`</option>
231 <!--<option value="tl.phate">, using `tl.phate`</option>!-->
232 <option value="pp.neighbors">Compute a neighborhood graph of observations, using `pp.neighbors`</option>
233 <option value="tl.rank_genes_groups">Rank genes for characterizing groups, using `tl.rank_genes_groups`</option>
234 </param>
235 <when value="tl.louvain">
236 <conditional name="flavor">
237 <param argument="flavor" type="select" label="Flavor for the clustering" help="">
238 <option value="vtraag">vtraag (much more powerful)</option>
239 <option value="igraph">igraph</option>
240 </param>
241 <when value="vtraag">
242 <param argument="resolution" type="float" value="" optional="true"
243 label="Resolution"
244 help="Higher resolution means finding more and smaller clusters, which defaults to 1.0. See “Time as a resolution parameter” in Lambiotte et al, 2009"/>
245 </when>
246 <when value="igraph"/>
247 </conditional>
248 <param argument="random_state" type="integer" value="0" label="Random state" help="Change the initialization of the optimization."/>
249 <param argument="key_added" type="text" value="louvain" optional="true" label="Key under which to add the cluster labels" help=""/>
250 </when>
251 <when value="pp.pca">
252 <expand macro="pca_inputs"/>
253 </when>
254 <when value="tl.pca">
255 <expand macro="pca_inputs"/>
256 </when>
257 <when value="tl.diffmap">
258 <param argument="n_comps" type="integer" min="0" value="15" optional="true" label="Number of dimensions of the representation" help=""/>
259 </when>
260 <when value="tl.tsne">
261 <param name="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use" help=""/>
262 <param name="perplexity" type="float" value="30" label="Perplexity" help="The perplexity is related to the number of nearest neighbors that is used in other manifold learning algorithms. Larger datasets usually require a larger perplexity. Consider selecting a value between 5 and 50. The choice is not extremely critical since t-SNE is quite insensitive to this parameter."/>
263 <param name="early_exaggeration" type="float" value="12.0" label="Early exaggeration" help="Controls how tight natural clusters in the original space are in the embedded space and how much space will be between them. For larger values, the space between natural clusters will be larger in the embedded space. Again, the choice of this parameter is not very critical. If the cost function increases during initial optimization, the early exaggeration factor or the learning rate might be too high."/>
264 <param name="learning_rate" type="float" value="1000" label="Learning rate" help="The learning rate can be a critical parameter. It should be between 100 and 1000. If the cost function increases during initial optimization, the early exaggeration factor or the learning rate might be too high. If the cost function gets stuck in a bad local minimum increasing the learning rate helps sometimes."/>
265 <param name="random_state" type="integer" value="0" label="Random state" help="Change this to use different intial states for the optimization"/>
266 </when>
267 <when value="tl.umap">
268 <param argument="min_dist" type="float" value="0.5" label="Effective minimum distance between embedded points" help="Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points. The value should be set relative to the `spread` value, which determines the scale at which embedded points will be spread out. The default of in the `umap-learn` package is 0.1."/>
269 <param argument="spread" type="float" value="1.0" label="Effective scale of embedded points" help="In combination with `min_dist` this determines how clustered/clumped the embedded points are."/>
270 <param argument="n_components" type="integer" min="0" value="2" label="Number of dimensions of the embedding" help=""/>
271 <param argument="maxiter" type="integer" min="0" value="" optional="true" label="Number of iterations (epochs) of the optimization" help="Called `n_epochs` in the original UMAP."/>
272 <param argument="alpha" type="float" value="1.0" label="Initial learning rate for the embedding optimization" help=""/>
273 <param argument="gamma" type="float" value="1.0" label="Weighting applied to negative samples in low dimensional embedding optimization" help="Values higher than one will result in greater weight being given to negative samples."/>
274 <param argument="negative_sample_rate" type="integer" min="0" value="5" label="The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample in optimizing the low dimensional embedding" help=""/>
275 <param argument="init_pos" type="select" label="How to initialize the low dimensional embedding" help="Called `init` in the original UMAP">
276 <option value="paga">Position from paga</option>
277 <option value="spectral" selected="true">Spectral embedding of the graph</option>
278 <option value="random">Initial embedding positions at random</option>
279 </param>
280 <param argument="random_state" type="integer" value="0" label="Random state" help="Change this to use different intial states for the optimization"/>
281 </when>
282 <when value="pp.neighbors">
283 <param argument="n_neighbors" type="integer" min="0" value="15" label="The size of local neighborhood (in terms of number of neighboring data points) used for manifold approximation" help="Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. If `knn` is `True`, number of nearest neighbors to be searched. If `knn` is `False`, a Gaussian kernel width is set to the distance of the `n_neighbors` neighbor."/>
284 <param argument="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use" help=""/>
285 <param argument="knn" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use a hard threshold to restrict the number of neighbors to n_neighbors?" help="If true, it considers a knn graph. Otherwise, it uses a Gaussian Kernel to assign low weights to neighbors more distant than the `n_neighbors` nearest neighbor."/>
286 <param argument="random_state" type="integer" value="0" label="Numpy random seed" help=""/>
287 <param name="pp_neighbors_method" argument="method" type="select" label="Method for computing connectivities" help="">
288 <option value="umap">umap (McInnes et al, 2018)</option>
289 <option value="gauss">gauss: Gauss kernel following (Coifman et al 2005) with adaptive width (Haghverdi et al 2016)</option>
290 </param>
291 <param argument="metric" type="select" label="Distance metric" help="">
292 <expand macro="distance_metric_options"/>
293 </param>
294 </when>
295 <when value="tl.rank_genes_groups">
296 <param argument="groupby" type="text" value="" label="The key of the observations grouping to consider" help=""/>
297 <expand macro="param_use_raw"/>
298 <param argument="groups" type="text" value="" label="Subset of groups to which comparison shall be restricted" help="e.g. ['g1', 'g2', 'g3']. If not passed, a ranking will be generated for all groups."/>
299 <conditional name="ref">
300 <param name="rest" type="select" label="Comparison">
301 <option value="rest">Compare each group to the union of the rest of the group</option>
302 <option value="group_id">Compare with respect to a specific group</option>
303 </param>
304 <when value="rest"/>
305 <when value="group_id">
306 <param argument="reference" type="text" value="" label="Group identifier with respect to which compare"/>
307 </when>
308 </conditional>
309 <param argument="n_genes" type="integer" min="0" value="100" label="The number of genes that appear in the returned tables" help=""/>
310 <conditional name="tl_rank_genes_groups_method">
311 <param argument="method" type="select" label="Method">
312 <option value="t-test">t-test</option>
313 <option value="wilcoxon">Wilcoxon-Rank-Sum</option>
314 <option value="t-test_overestim_var" selected="true">t-test with overestimate of variance of each group</option>
315 <option value="logreg">Logistic regression</option>
316 </param>
317 <when value="t-test"/>
318 <when value="wilcoxon"/>
319 <when value="t-test_overestim_var"/>
320 <when value="logreg">
321 <conditional name="solver">
322 <param argument="solver" type="select" label="Algorithm to use in the optimization problem" help="For small datasets, ‘liblinear’ is a good choice, whereas ‘sag’ and ‘saga’ are faster for large ones. For multiclass problems, only ‘newton-cg’, ‘sag’, ‘saga’ and ‘lbfgs’ handle multinomial loss; ‘liblinear’ is limited to one-versus-rest schemes. ‘newton-cg’, ‘lbfgs’ and ‘sag’ only handle L2 penalty, whereas ‘liblinear’ and ‘saga’ handle L1 penalty.">
323 <option value="newton-cg">newton-cg</option>
324 <option value="lbfgs">lbfgs</option>
325 <option value="liblinear">liblinear</option>
326 <option value="sag">sag</option>
327 <option value="saga">saga</option>
328 </param>
329 <when value="newton-cg">
330 <expand macro="fit_intercept"/>
331 <expand macro="max_iter"/>
332 <expand macro="multi_class"/>
333 </when>
334 <when value="lbfgs">
335 <expand macro="fit_intercept"/>
336 <expand macro="max_iter"/>
337 <expand macro="multi_class"/>
338 </when>
339 <when value="liblinear">
340 <conditional name="penalty">
341 <expand macro="penalty"/>
342 <when value="l1"/>
343 <when value="l2">
344 <param argument="dual" type="boolean" truevalue="True" falsevalue="False" checked="false"
345 label="Dual (not primal) formulation?" help="Prefer primal when n_samples > n_features"/>
346 </when>
347 <when value="customized">
348 <expand macro="custom_penalty"/>
349 </when>
350 </conditional>
351 <conditional name="intercept_scaling">
352 <param argument="fit_intercept" type="select"
353 label="Should a constant (a.k.a. bias or intercept) be added to the decision function?" help="">
354 <option value="True">Yes</option>
355 <option value="False">No</option>
356 </param>
357 <when value="True">
358 <param argument="intercept_scaling" type="float" value="1.0"
359 label="Intercept scaling"
360 help="x becomes [x, self.intercept_scaling], i.e. a 'synthetic' feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic_feature_weight."/>
361 </when>
362 <when value="False"/>
363 </conditional>
364 <expand macro="random_state"/>
365 </when>
366 <when value="sag">
367 <expand macro="fit_intercept"/>
368 <expand macro="random_state"/>
369 <expand macro="max_iter"/>
370 <expand macro="multi_class"/>
371 </when>
372 <when value="saga">
373 <conditional name="penalty">
374 <expand macro="penalty"/>
375 <when value="l1"/>
376 <when value="l2"/>
377 <when value="customized">
378 <expand macro="custom_penalty"/>
379 </when>
380 </conditional>
381 <expand macro="fit_intercept"/>
382 <expand macro="multi_class"/>
383 </when>
384 </conditional>
385 <param argument="tol" type="float" value="1e-4" label="Tolerance for stopping criteria" help=""/>
386 <param argument="c" type="float" value="1.0" label="Inverse of regularization strength"
387 help="It must be a positive float. Like in support vector machines, smaller values specify stronger regularization."/>
388 </when>
389 </conditional>
390 <param argument="only_positive" type="boolean" truevalue="True" falsevalue="False" checked="true"
391 label="Only consider positive differences?" help=""/>
392 </when>
393 </conditional>
394 <expand macro="anndata_output_format"/>
395 </inputs>
396 <outputs>
397 <expand macro="anndata_outputs"/>
398 <data name="X_pca" format="tabular" label="${tool.name} on ${on_string}: PCA representation of data">
399 <filter>method['method'] == 'pp.pca' or method['method'] == 'tl.pca'</filter>
400 </data>
401 <data name="PCs" format="tabular" label="${tool.name} on ${on_string}: Principal components containing the loadings">
402 <filter>method['method'] == 'pp.pca' or method['method'] == 'tl.pca'</filter>
403 </data>
404 <data name="variance_ratio" format="tabular" label="${tool.name} on ${on_string}: Ratio of explained variance">
405 <filter>method['method'] == 'pp.pca' or method['method'] == 'tl.pca'</filter>
406 </data>
407 <data name="variance" format="tabular" label="${tool.name} on ${on_string}: Explained variance, equivalent to the eigenvalues of the covariance matrix">
408 <filter>method['method'] == 'pp.pca' or method['method'] == 'tl.pca'</filter>
409 </data>
410 <data name="X_diffmap" format="tabular" label="${tool.name} on ${on_string}: Diffusion map representation">
411 <filter>method['method'] == 'tl.diffmap'</filter>
412 </data>
413 <data name="X_tsne" format="tabular" label="${tool.name} on ${on_string}: tSNE coordinates">
414 <filter>method['method'] == 'tl.tsne'</filter>
415 </data>
416 <data name="X_umap" format="tabular" label="${tool.name} on ${on_string}: UMAP coordinates">
417 <filter>method['method'] == 'tl.umap'</filter>
418 </data>
419 <data name="names" format="tabular" label="${tool.name} on ${on_string}: Gene names">
420 <filter>method['method'] == 'tl.rank_genes_groups'</filter>
421 </data>
422 <data name="scores" format="tabular" label="${tool.name} on ${on_string}: Scores">
423 <filter>method['method'] == 'tl.rank_genes_groups'</filter>
424 </data>
425 </outputs>
426 <tests>
427 <test expect_num_outputs="1">
428 <conditional name="input">
429 <param name="format" value="h5ad" />
430 <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" />
431 </conditional>
432 <conditional name="method">
433 <param name="method" value="tl.louvain"/>
434 <conditional name="flavor">
435 <param name="flavor" value="vtraag"/>
436 <param name="resolution" value="1.0"/>
437 </conditional>
438 <param name="random_state" value="10"/>
439 <param name="key_added" value="louvain"/>
440 </conditional>
441 <param name="anndata_output_format" value="h5ad" />
442 <assert_stdout>
443 <has_text_matching expression="sc.tl.louvain"/>
444 <has_text_matching expression="adata=adata"/>
445 <has_text_matching expression="flavor = 'vtraag'"/>
446 <has_text_matching expression="resolution=1.0"/>
447 <has_text_matching expression="random_state=10"/>
448 <has_text_matching expression="key_added='louvain'"/>
449 </assert_stdout>
450 <output name="anndata_out_h5ad" file="tl.louvain.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"/>
451 </test>
452 <test expect_num_outputs="5">
453 <conditional name="input">
454 <param name="format" value="h5ad" />
455 <param name="adata" value="krumsiek11.h5ad" />
456 </conditional>
457 <conditional name="method">
458 <param name="method" value="pp.pca"/>
459 <param name="n_comps" value="50"/>
460 <param name="dtype" value="float32"/>
461 <conditional name="pca">
462 <param name="chunked" value="False"/>
463 <param name="zero_center" value="True"/>
464 <param name="svd_solver" value="auto"/>
465 <param name="random_state" value="0"/>
466 </conditional>
467 </conditional>
468 <param name="anndata_output_format" value="h5ad" />
469 <assert_stdout>
470 <has_text_matching expression="sc.pp.pca"/>
471 <has_text_matching expression="n_comps=50"/>
472 <has_text_matching expression="dtype='float32'"/>
473 <has_text_matching expression="copy=False"/>
474 <has_text_matching expression="chunked=False"/>
475 <has_text_matching expression="zero_center='True'"/>
476 <has_text_matching expression="svd_solver='auto'"/>
477 <has_text_matching expression="random_state=0"/>
478 </assert_stdout>
479 <output name="anndata_out_h5ad" file="pp.pca.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
480 <output name="X_pca">
481 <assert_contents>
482 <has_text_matching expression="-2.579\d{15}e-01" />
483 <has_text_matching expression="3.452\d{15}e-01" />
484 <has_text_matching expression="-6.088\d{15}e-03" />
485 <has_n_columns n="10" />
486 </assert_contents>
487 </output>
488 <output name="PCs">
489 <assert_contents>
490 <has_text_matching expression="-2.285\d{15}e-01" />
491 <has_text_matching expression="-3.042\d{15}e-01" />
492 <has_text_matching expression="-2.863\d{15}e-02" />
493 <has_text_matching expression="1.294\d{15}e-01" />
494 <has_n_columns n="10" />
495 </assert_contents>
496 </output>
497 <output name="variance_ratio">
498 <assert_contents>
499 <has_text_matching expression="2.148\d{15}e-01" />
500 <has_text_matching expression="7.596\d{15}e-02" />
501 <has_text_matching expression="5.033\d{15}e-03" />
502 <has_text_matching expression="2.801\d{15}e-05" />
503 <has_n_columns n="1" />
504 </assert_contents>
505 </output>
506 <output name="variance" file="pp.pca.variance.krumsiek11.tabular" />
507 </test>
508 <test expect_num_outputs="5">
509 <conditional name="input">
510 <param name="format" value="h5ad" />
511 <param name="adata" value="krumsiek11.h5ad" />
512 </conditional>
513 <conditional name="method">
514 <param name="method" value="pp.pca"/>
515 <param name="n_comps" value="20"/>
516 <param name="dtype" value="float32"/>
517 <conditional name="pca">
518 <param name="chunked" value="True"/>
519 <param name="chunk_size" value="50"/>
520 </conditional>
521 </conditional>
522 <param name="anndata_output_format" value="h5ad" />
523 <assert_stdout>
524 <has_text_matching expression="sc.pp.pca"/>
525 <has_text_matching expression="data=adata"/>
526 <has_text_matching expression="n_comps=20"/>
527 <has_text_matching expression="dtype='float32'"/>
528 <has_text_matching expression="copy=False"/>
529 <has_text_matching expression="chunked=True"/>
530 <has_text_matching expression="chunk_size=50"/>
531 </assert_stdout>
532 <output name="anndata_out_h5ad" file="pp.pca.krumsiek11_chunk.h5ad" ftype="h5" compare="sim_size"/>
533 <output name="X_pca">
534 <assert_contents>
535 <has_text_matching expression="1.290\d{15}e-03" />
536 <has_text_matching expression="9.231\d{15}e-04" />
537 <has_text_matching expression="-3.498\d{15}e-02" />
538 <has_text_matching expression="-4.921\d{15}e-03" />
539 <has_n_columns n="10" />
540 </assert_contents>
541 </output>
542 <output name="PCs">
543 <assert_contents>
544 <has_text_matching expression="2.35298924\d\d\d\d\d\d\d\d\d\de-0\d" />
545 <has_text_matching expression="2.4286999\d\d\d\d\d\d\d\d\d\d\de-0\d" />
546 <has_n_columns n="10" />
547 </assert_contents>
548 </output>
549 <output name="variance_ratio">
550 <assert_contents>
551 <has_text text="6.4362" />
552 <has_text text="2.7348" />
553 <has_n_columns n="1" />
554 </assert_contents>
555 </output>
556 <output name="variance">
557 <assert_contents>
558 <has_text_matching expression="7.540\d{15}e-01" />
559 <has_text_matching expression="1.173\d{15}e-03" />
560 <has_text_matching expression="3.204\d{15}e-05" />
561 <has_n_columns n="1" />
562 </assert_contents>
563 </output>
564 </test>
565 <test expect_num_outputs="5">
566 <conditional name="input">
567 <param name="format" value="h5ad" />
568 <param name="adata" value="krumsiek11.h5ad" />
569 </conditional>
570 <conditional name="method">
571 <param name="method" value="tl.pca"/>
572 <param name="n_comps" value="50"/>
573 <param name="dtype" value="float32"/>
574 <conditional name="pca">
575 <param name="chunked" value="False"/>
576 <param name="zero_center" value="True"/>
577 <param name="svd_solver" value="auto"/>
578 <param name="random_state" value="0"/>
579 </conditional>
580 </conditional>
581 <param name="anndata_output_format" value="h5ad" />
582 <assert_stdout>
583 <has_text_matching expression="sc.tl.pca"/>
584 <has_text_matching expression="n_comps=50"/>
585 <has_text_matching expression="dtype='float32'"/>
586 <has_text_matching expression="copy=False"/>
587 <has_text_matching expression="chunked=False"/>
588 <has_text_matching expression="zero_center='True'"/>
589 <has_text_matching expression="svd_solver='auto'"/>
590 </assert_stdout>
591 <output name="anndata_out_h5ad" file="tl.pca.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
592 <output name="X_pca">
593 <assert_contents>
594 <has_text_matching expression="-6.366\d{15}e-01" />
595 <has_text_matching expression="5.702\d{15}e-03" />
596 <has_text_matching expression="1.862\d{15}e-02" />
597 <has_text_matching expression="-6.861\d{15}e-02" />
598 <has_n_columns n="10" />
599 </assert_contents>
600 </output>
601 <output name="PCs">
602 <assert_contents>
603 <has_text_matching expression="1.341\d{15}e-01" />
604 <has_text_matching expression="-3.478\d{15}e-03" />
605 <has_text_matching expression="-4.890\d{15}e-02" />
606 <has_text_matching expression="-2.628\d{15}e-02" />
607 <has_n_columns n="10" />
608 </assert_contents>
609 </output>
610 <output name="variance_ratio">
611 <assert_contents>
612 <has_text_matching expression="6.436\d{15}e-01" />
613 <has_text_matching expression="1.316\d{15}e-04" />
614 <has_text_matching expression="2.801\d{15}e-05" />
615 <has_n_columns n="1" />
616 </assert_contents>
617 </output>
618 <output name="variance">
619 <assert_contents>
620 <has_text_matching expression="4.575\d{15}e-02" />
621 <has_text_matching expression="2.166\d{15}e-02" />
622 <has_text_matching expression="5.896\d{15}e-03" />
623 <has_n_columns n="1" />
624 </assert_contents>
625 </output>
626 </test>
627 <test expect_num_outputs="2">
628 <conditional name="input">
629 <param name="format" value="h5ad" />
630 <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" />
631 </conditional>
632 <conditional name="method">
633 <param name="method" value="tl.diffmap"/>
634 <param name="n_comps" value="15"/>
635 </conditional>
636 <param name="anndata_output_format" value="h5ad" />
637 <assert_stdout>
638 <has_text_matching expression="sc.tl.diffmap"/>
639 <has_text_matching expression="n_comps=15"/>
640 </assert_stdout>
641 <output name="anndata_out_h5ad" file="tl.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"/>
642 <output name="X_diffmap" file="tl.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.X_diffmap.tabular"/>
643 </test>
644 <test expect_num_outputs="2">
645 <conditional name="input">
646 <param name="format" value="h5ad" />
647 <param name="adata" value="krumsiek11.h5ad" />
648 </conditional>
649 <conditional name="method">
650 <param name="method" value="tl.tsne"/>
651 <param name="n_pcs" value="10"/>
652 <param name="perplexity" value="30"/>
653 <param name="early_exaggeration" value="12.0"/>
654 <param name="learning_rate" value="1000"/>
655 <param name="random_state" value="0"/>
656 </conditional>
657 <param name="anndata_output_format" value="h5ad" />
658 <assert_stdout>
659 <has_text_matching expression="sc.tl.tsne"/>
660 <has_text_matching expression="n_pcs=10"/>
661 <has_text_matching expression="perplexity=30.0"/>
662 <has_text_matching expression="early_exaggeration=12.0"/>
663 <has_text_matching expression="learning_rate=1000.0"/>
664 <has_text_matching expression="random_state=0"/>
665 </assert_stdout>
666 <output name="anndata_out_h5ad" file="tl.tsne.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
667 <output name="X_tsne" file="tl.tsne.krumsiek11_X_tsne.tabular"/>
668 </test>
669 <test expect_num_outputs="2" >
670 <conditional name="input">
671 <param name="format" value="h5ad" />
672 <param name="adata" value="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" />
673 </conditional>
674 <conditional name="method">
675 <param name="method" value="tl.umap"/>
676 <param name="min_dist" value="0.5"/>
677 <param name="spread" value="1.0"/>
678 <param name="n_components" value="2"/>
679 <param name="maxiter" value="2"/>
680 <param name="alpha" value="1.0"/>
681 <param name="gamma" value="1.0"/>
682 <param name="negative_sample_rate" value="5"/>
683 <param name="init_pos" value="spectral"/>
684 <param name="random_state" value="0"/>
685 </conditional>
686 <param name="anndata_output_format" value="h5ad" />
687 <assert_stdout>
688 <has_text_matching expression="sc.tl.umap"/>
689 <has_text_matching expression="min_dist=0.5"/>
690 <has_text_matching expression="spread=1.0"/>
691 <has_text_matching expression="n_components=2"/>
692 <has_text_matching expression="maxiter=2"/>
693 <has_text_matching expression="alpha=1.0"/>
694 <has_text_matching expression="gamma=1.0"/>
695 <has_text_matching expression="negative_sample_rate=5"/>
696 <has_text_matching expression="init_pos='spectral'"/>
697 <has_text_matching expression="random_state=0"/>
698 </assert_stdout>
699 <output name="anndata_out_h5ad" file="tl.umap.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size">
700 <assert_contents>
701 <has_h5_keys keys="X, obs, obsm, uns, var" />
702 </assert_contents>
703 </output>
704 <output name="X_umap">
705 <assert_contents>
706 <has_text text="2.31791388" />
707 <has_text text="-4.8602690" />
708 <has_text text="-1.8031970" />
709 <has_text text="2.31166780" />
710 <has_n_columns n="2" />
711 </assert_contents>
712 </output>
713 </test>
714 <test expect_num_outputs="1">
715 <conditional name="input">
716 <param name="format" value="h5ad" />
717 <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad" />
718 </conditional>
719 <conditional name="method">
720 <param name="method" value="pp.neighbors"/>
721 <param name="n_neighbors" value="15"/>
722 <param name="knn" value="True"/>
723 <param name="random_state" value="0"/>
724 <param name="pp_neighbors_method" value="umap"/>
725 <param name="metric" value="euclidean"/>
726 </conditional>
727 <param name="anndata_output_format" value="h5ad" />
728 <assert_stdout>
729 <has_text_matching expression="sc.pp.neighbors"/>
730 <has_text_matching expression="n_neighbors=15"/>
731 <has_text_matching expression="knn=True"/>
732 <has_text_matching expression="random_state=0"/>
733 <has_text_matching expression="method='umap'"/>
734 <has_text_matching expression="metric='euclidean'"/>
735 </assert_stdout>
736 <output name="anndata_out_h5ad" file="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size">
737 <assert_contents>
738 <has_h5_keys keys="X, obs, obsm, uns, var" />
739 </assert_contents>
740 </output>
741 </test>
742 <test expect_num_outputs="1">
743 <conditional name="input">
744 <param name="format" value="h5ad" />
745 <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad" />
746 </conditional>
747 <conditional name="method">
748 <param name="method" value="pp.neighbors"/>
749 <param name="n_neighbors" value="15"/>
750 <param name="knn" value="True"/>
751 <param name="pp_neighbors_method" value="gauss"/>
752 <param name="metric" value="braycurtis"/>
753 </conditional>
754 <param name="anndata_output_format" value="h5ad" />
755 <assert_stdout>
756 <has_text_matching expression="sc.pp.neighbors"/>
757 <has_text_matching expression="n_neighbors=15"/>
758 <has_text_matching expression="knn=True"/>
759 <has_text_matching expression="random_state=0"/>
760 <has_text_matching expression="method='gauss'"/>
761 <has_text_matching expression="metric='braycurtis'"/>
762 </assert_stdout>
763 <output name="anndata_out_h5ad" file="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"/>
764 </test>
765 <test expect_num_outputs="3">
766 <conditional name="input">
767 <param name="format" value="h5ad" />
768 <param name="adata" value="krumsiek11.h5ad" />
769 </conditional>
770 <conditional name="method">
771 <param name="method" value="tl.rank_genes_groups"/>
772 <param name="groupby" value="cell_type"/>
773 <param name="use_raw" value="True"/>
774 <conditional name="ref">
775 <param name="rest" value="rest"/>
776 </conditional>
777 <param name="n_genes" value="100"/>
778 <conditional name="tl_rank_genes_groups_method">
779 <param name="method" value="t-test_overestim_var"/>
780 </conditional>
781 <param name="only_positive" value="True"/>
782 </conditional>
783 <param name="anndata_output_format" value="h5ad" />
784 <assert_stdout>
785 <has_text_matching expression="sc.tl.rank_genes_groups"/>
786 <has_text_matching expression="groupby='cell_type'"/>
787 <has_text_matching expression="use_raw=True"/>
788 <has_text_matching expression="reference='rest'"/>
789 <has_text_matching expression="n_genes=100"/>
790 <has_text_matching expression="method='t-test_overestim_var'"/>
791 <has_text_matching expression="only_positive=True"/>
792 </assert_stdout>
793 <output name="anndata_out_h5ad" file="tl.rank_genes_groups.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
794 <output name="names">
795 <assert_contents>
796 <has_n_columns n="5" />
797 <has_text_matching expression="Ery\tMk\tMo\tNeu\tprogenitor"/>
798 <has_text_matching expression="Gata1\tFog1\tCebpa\tFli1\tGata2"/>
799 <has_text_matching expression="EgrNab\tEgrNab\tSCL\tSCL\tGfi1"/>
800 </assert_contents>
801 </output>
802 <output name="scores">
803 <assert_contents>
804 <has_n_columns n="5" />
805 <has_text_matching expression="Ery\tMk\tMo\tNeu\tprogenitor"/>
806 <has_text_matching expression="18.86\d{4}"/>
807 <has_text_matching expression="17.85\d{4}"/>
808 <has_text_matching expression="-2.63\d{4}"/>
809 <has_text_matching expression="-2.98\d{4}"/>
810 <has_text_matching expression="-6.41\d{4}"/>
811 </assert_contents>
812 </output>
813 </test>
814 <test expect_num_outputs="3">
815 <conditional name="input">
816 <param name="format" value="h5ad" />
817 <param name="adata" value="pbmc68k_reduced.h5ad" />
818 </conditional>
819 <conditional name="method">
820 <param name="method" value="tl.rank_genes_groups"/>
821 <param name="groupby" value="louvain"/>
822 <param name="use_raw" value="True"/>
823 <conditional name="ref">
824 <param name="rest" value="rest"/>
825 </conditional>
826 <param name="n_genes" value="100"/>
827 <conditional name="tl_rank_genes_groups_method">
828 <param name="method" value="logreg"/>
829 <conditional name="solver">
830 <param name="solver" value="newton-cg"/>
831 <param name="fit_intercept" value="True"/>
832 <param name="max_iter" value="100"/>
833 <param name="multi_class" value="auto"/>
834 </conditional>
835 <param name="tol" value="1e-4"/>
836 <param name="c" value="1.0"/>
837 </conditional>
838 <param name="only_positive" value="True"/>
839 </conditional>
840 <param name="anndata_output_format" value="h5ad" />
841 <assert_stdout>
842 <has_text_matching expression="sc.tl.rank_genes_groups"/>
843 <has_text_matching expression="groupby='louvain'"/>
844 <has_text_matching expression="use_raw=True"/>
845 <has_text_matching expression="reference='rest'"/>
846 <has_text_matching expression="n_genes=100"/>
847 <has_text_matching expression="method='logreg'"/>
848 <has_text_matching expression="solver='newton-cg'"/>
849 <has_text_matching expression="penalty='l2'"/>
850 <has_text_matching expression="fit_intercept=True"/>
851 <has_text_matching expression="max_iter=100"/>
852 <has_text_matching expression="multi_class='auto'"/>
853 <has_text_matching expression="tol=0.0001"/>
854 <has_text_matching expression="C=1.0"/>
855 <has_text_matching expression="only_positive=True"/>
856 </assert_stdout>
857 <output name="anndata_out_h5ad" ftype="h5">
858 <assert_contents>
859 <has_h5_keys keys="X, obs, obsm, raw.X, raw.var, uns, var" />
860 </assert_contents>
861 </output>
862 <output name="names">
863 <assert_contents>
864 <has_n_columns n="7" />
865 <has_text_matching expression="IL32\tFCGR3A\tFCER1A\tLTB\tCPVL\tIGJ\tPRSS57"/>
866 <has_text_matching expression="KIAA0101\tFCER1G\tHLA-DMA\tHLA-DQA1\tNAAA\tMANF\tCCDC104"/>
867 <has_text_matching expression="CCNB2\t"/>
868 </assert_contents>
869 </output>
870 <output name="scores">
871 <assert_contents>
872 <has_n_columns n="7" />
873 <has_text_matching expression="0.088\d+"/>
874 <has_text_matching expression="0.114\d+"/>
875 <has_text_matching expression="0.034\d+"/>
876 <has_text_matching expression="0.035\d+"/>
877 <has_text_matching expression="0.041\d+"/>
878 </assert_contents>
879 </output>
880 </test>
881 <test expect_num_outputs="3">
882 <conditional name="input">
883 <param name="format" value="h5ad" />
884 <param name="adata" value="pbmc68k_reduced.h5ad" />
885 </conditional>
886 <conditional name="method">
887 <param name="method" value="tl.rank_genes_groups"/>
888 <param name="groupby" value="louvain"/>
889 <param name="use_raw" value="True"/>
890 <conditional name="ref">
891 <param name="rest" value="rest"/>
892 </conditional>
893 <param name="n_genes" value="100"/>
894 <conditional name="tl_rank_genes_groups_method">
895 <param name="method" value="logreg"/>
896 <conditional name="solver">
897 <param name="solver" value="liblinear"/>
898 <conditional name="penalty">
899 <param name="penalty" value="l2"/>
900 <param name="dual" value="False"/>
901 <conditional name="intercept_scaling">
902 <param name="fit_intercept" value="True"/>
903 <param name="intercept_scaling" value="1.0" />
904 </conditional>
905 <param name="random_state" value="1"/>
906 </conditional>
907 </conditional>
908 <param name="tol" value="1e-4"/>
909 <param name="c" value="1.0"/>
910 </conditional>
911 <param name="only_positive" value="True"/>
912 </conditional>
913 <param name="anndata_output_format" value="h5ad" />
914 <assert_stdout>
915 <has_text_matching expression="sc.tl.rank_genes_groups"/>
916 <has_text_matching expression="groupby='louvain'"/>
917 <has_text_matching expression="use_raw=True"/>
918 <has_text_matching expression="reference='rest'"/>
919 <has_text_matching expression="n_genes=100"/>
920 <has_text_matching expression="method='logreg'"/>
921 <has_text_matching expression="solver='liblinear'"/>
922 <has_text_matching expression="penalty='l2'"/>
923 <has_text_matching expression="dual=False"/>
924 <has_text_matching expression="fit_intercept=True"/>
925 <has_text_matching expression="intercept_scaling=1.0"/>
926 <has_text_matching expression="tol=0.0001"/>
927 <has_text_matching expression="C=1.0"/>
928 <has_text_matching expression="only_positive=True"/>
929 </assert_stdout>
930 <output name="anndata_out_h5ad" ftype="h5">
931 <assert_contents>
932 <has_h5_keys keys="X, obs, obsm, raw.X, raw.var, uns, var" />
933 </assert_contents>
934 </output>
935 <output name="names">
936 <assert_contents>
937 <has_n_columns n="7" />
938 <has_text_matching expression="AES\tLST1\tRNASE6\tPLAC8\tCST3\tSEC11C\tNASP"/>
939 <has_text_matching expression="GIMAP4\tMIS18BP1\tLILRB4\tTUBA4A\tCOMTD1\tSLC25A4\tLEPROT"/>
940 <has_text_matching expression="GGH\tLYN\tMAGOHB\tAL928768.3\tITGB2-AS1\tCENPH\tASRGL1"/>
941 </assert_contents>
942 </output>
943 <output name="scores">
944 <assert_contents>
945 <has_n_columns n="7" />
946 <has_text_matching expression="0.1680\d{4}\t0.2156\d{4}\t0.281\d{4}\t0.2100\d{4}\t0.2332\d{4}\t0.1586\d{4}\t0.12057\d{4}"/>
947 <has_text_matching expression="0.0784\d{4}\t0.0699\d{4}\t0.06912\d{4}\t0.05364\d{4}\t0.03933\d{4}\t0.03994\d{4}\t0.0411\d{4}"/>
948 <has_text_matching expression="0.06232\d{4}\t0.05563\d{4}\t0.0565\d{4}\t0.04164\d{4}\t0.02636\d{4}\t0.03002\d{4}\t0.032\d{4}"/>
949 </assert_contents>
950 </output>
951 </test>
952 </tests>
953 <help><![CDATA[
954 Cluster cells into subgroups, using `tl.louvain`
955 ================================================
956
957 Cluster cells using the Louvain algorithm (Blondel et al, 2008) in the implementation
958 of Traag et al,2017. The Louvain algorithm has been proposed for single-cell
959 analysis by Levine et al, 2015.
960
961 This requires to run `pp.neighbors`, first.
962
963 More details on the `scanpy documentation
964 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.louvain.html#scanpy.api.tl.louvain>`_
965
966 Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using `pp.pca`
967 ============================================================================================================
968
969 It uses the implementation of *scikit-learn*.
970
971 More details on the `scanpy documentation
972 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.pca.html#scanpy.api.pp.pca>`__
973
974 Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using `tl.pca`
975 ============================================================================================================
976
977 It uses the implementation of *scikit-learn*.
978
979 Diffusion Maps
980
981 More details on the `scanpy documentation
982 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.pca.html#scanpy.api.tl.pca>`__
983
984 Diffusion Maps, using `tl.diffmap`
985 ==================================
986
987 Diffusion maps (Coifman et al 2005) has been proposed for visualizing single-cell
988 data by Haghverdi et al (2015). The tool uses the adapted Gaussian kernel suggested
989 by Haghverdi et al (2016) in the implementation of Wolf et al (2017).
990
991 The width ("sigma") of the connectivity kernel is implicitly determined by
992 the number of neighbors used to compute the single-cell graph in
993 `pp.neighbors`. To reproduce the original implementation
994 using a Gaussian kernel, use `method=='gauss'` in
995 `pp.neighbors`. To use an exponential kernel, use the default
996 `method=='umap'`. Differences between these options shouldn't usually be
997 dramatic.
998
999 It returns `X_diffmap`, diffusion map representation of data, which is the right eigen basis of the transition matrix with eigenvectors as columns.
1000
1001 More details on the `scanpy documentation
1002 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.diffmap.html#scanpy.api.tl.diffmap>`__
1003
1004 t-distributed stochastic neighborhood embedding (tSNE), using `tl.tsne`
1005 =======================================================================
1006
1007 t-distributed stochastic neighborhood embedding (tSNE) (Maaten et al, 2008) has been
1008 proposed for visualizating single-cell data by (Amir et al, 2013). Here, by default,
1009 we use the implementation of *scikit-learn* (Pedregosa et al, 2011).
1010
1011 It returns `X_tsne`, tSNE coordinates of data.
1012
1013 More details on the `scanpy documentation
1014 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.tsne.html#scanpy.api.tl.tsne>`__
1015
1016 Embed the neighborhood graph using UMAP, using `tl.umap`
1017 ========================================================
1018
1019 UMAP (Uniform Manifold Approximation and Projection) is a manifold learning
1020 technique suitable for visualizing high-dimensional data. Besides tending to
1021 be faster than tSNE, it optimizes the embedding such that it best reflects
1022 the topology of the data, which we represent throughout Scanpy using a
1023 neighborhood graph. tSNE, by contrast, optimizes the distribution of
1024 nearest-neighbor distances in the embedding such that these best match the
1025 distribution of distances in the high-dimensional space. We use the
1026 implementation of `umap-learn <https://github.com/lmcinnes/umap>`__
1027 (McInnes et al, 2018). For a few comparisons of UMAP with tSNE, see this `preprint
1028 <https://doi.org/10.1101/298430>`__.
1029
1030 It returns `X_umap`, UMAP coordinates of data.
1031
1032 More details on the `scanpy documentation
1033 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.umap.html#scanpy.api.tl.umap>`__
1034
1035 Compute a neighborhood graph of observations, using `pp.neighbors`
1036 ==================================================================
1037
1038 The neighbor search efficiency of this heavily relies on UMAP (McInnes et al, 2018),
1039 which also provides a method for estimating connectivities of data points -
1040 the connectivity of the manifold (`method=='umap'`). If `method=='diffmap'`,
1041 connectivities are computed according to Coifman et al (2005), in the adaption of
1042 Haghverdi et al (2016).
1043
1044 More details on the `scanpy documentation
1045 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.neighbors.html#scanpy.api.pp.neighbors>`__
1046
1047 Rank genes for characterizing groups, using `tl.rank_genes_groups`
1048 ==================================================================
1049
1050 It returns:
1051
1052 - `Gene names`: Gene names ordered in column by group id and in rows according to scores
1053 - `Scores`: Score for each gene (rows) for each group (columns), same order as for the names
1054
1055 More details on the `scanpy documentation
1056 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.rank_genes_groups.html#scanpy.api.tl.rank_genes_groups>`__
1057
1058 ]]></help>
1059 <expand macro="citations"/>
1060 </tool>