Mercurial > repos > iuc > scanpy_filter
comparison filter.xml @ 0:6ea5a05a260a draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 92f85afaed0097d1879317a9f513093fce5481d6
author | iuc |
---|---|
date | Mon, 04 Mar 2019 10:15:02 -0500 |
parents | |
children | 6a76b60e05f5 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:6ea5a05a260a |
---|---|
1 <tool id="scanpy_filter" name="Filter with scanpy" version="@galaxy_version@"> | |
2 <description></description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements"/> | |
7 <expand macro="version_command"/> | |
8 <command detect_errors="exit_code"><![CDATA[ | |
9 @CMD@ | |
10 ]]></command> | |
11 <configfiles> | |
12 <configfile name="script_file"><![CDATA[ | |
13 @CMD_imports@ | |
14 @CMD_read_inputs@ | |
15 | |
16 #if $method.method == 'pp.filter_cells' | |
17 res = sc.pp.filter_cells( | |
18 #if $modify_anndata.modify_anndata == 'true' | |
19 adata, | |
20 #else | |
21 adata.X, | |
22 #end if | |
23 #if $method.filter.filter == 'min_counts' | |
24 min_counts=$method.filter.min_counts, | |
25 #elif $method.filter.filter == 'max_counts' | |
26 max_counts=$method.filter.max_counts, | |
27 #elif $method.filter.filter == 'min_genes' | |
28 min_genes=$method.filter.min_genes, | |
29 #elif $method.filter.filter == 'max_genes' | |
30 max_genes=$method.filter.max_genes, | |
31 #end if | |
32 copy=False) | |
33 | |
34 #if $modify_anndata.modify_anndata == 'true' | |
35 df = adata.obs | |
36 #else | |
37 df = pd.DataFrame(data=dict(cell_subset=res[0], number_per_cell=res[1])) | |
38 #end if | |
39 | |
40 #if $method.filter.filter == 'min_counts' or $method.filter.filter == 'max_counts' | |
41 df.to_csv('$counts_per_cell', sep='\t') | |
42 #elif $method.filter.filter == 'min_genes' or $method.filter.filter == 'max_genes' | |
43 df.to_csv('$genes_per_cell', sep='\t') | |
44 #end if | |
45 | |
46 #elif $method.method == 'pp.filter_genes' | |
47 res = sc.pp.filter_genes( | |
48 #if $modify_anndata.modify_anndata == 'true' | |
49 adata, | |
50 #else | |
51 adata.X, | |
52 #end if | |
53 #if $method.filter.filter == 'min_counts' | |
54 min_counts=$method.filter.min_counts, | |
55 #elif $method.filter.filter == 'max_counts' | |
56 max_counts=$method.filter.max_counts, | |
57 #elif $method.filter.filter == 'min_cells' | |
58 min_cells=$method.filter.min_cells, | |
59 #elif $method.filter.filter == 'max_cells' | |
60 max_cells=$method.filter.max_cells, | |
61 #end if | |
62 copy=False) | |
63 | |
64 #if $modify_anndata.modify_anndata == 'true' | |
65 df = adata.var | |
66 #else | |
67 df = pd.DataFrame(data=dict(gene_subset=res[0], number_per_gene=res[1])) | |
68 #end if | |
69 | |
70 #if $method.filter.filter == 'min_counts' or $method.filter.filter == 'max_counts' | |
71 df.to_csv('$counts_per_gene', sep='\t') | |
72 #elif $method.filter.filter == 'min_cells' or $method.filter.filter == 'max_cells' | |
73 df.to_csv('$cells_per_gene', sep='\t') | |
74 #end if | |
75 | |
76 #elif $method.method == 'pp.filter_genes_dispersion' | |
77 res = sc.pp.filter_genes_dispersion( | |
78 #if $modify_anndata.modify_anndata == 'true' | |
79 adata, | |
80 #else | |
81 adata.X, | |
82 #end if | |
83 flavor='$method.flavor.flavor', | |
84 #if $method.flavor.flavor=='seurat' | |
85 min_mean=$method.flavor.min_mean, | |
86 max_mean=$method.flavor.max_mean, | |
87 min_disp=$method.flavor.min_disp, | |
88 #if $method.flavor.max_disp | |
89 max_disp=$method.flavor.max_disp, | |
90 #end if | |
91 #else | |
92 n_top_genes=$method.flavor.n_top_genes, | |
93 #end if | |
94 n_bins=$method.n_bins, | |
95 log=$method.log, | |
96 copy=False) | |
97 | |
98 #if $modify_anndata.modify_anndata == 'true' | |
99 adata.var.to_csv('$per_gene', sep='\t') | |
100 #else | |
101 pd.DataFrame(res).to_csv('$per_gene', sep='\t') | |
102 #end if | |
103 | |
104 #elif $method.method == 'pp.subsample' | |
105 sc.pp.subsample( | |
106 data=adata, | |
107 #if $method.type.type == 'fraction' | |
108 fraction=$method.type.fraction, | |
109 #else if $method.type.type == 'n_obs' | |
110 n_obs=$method.type.n_obs, | |
111 #end if | |
112 random_state=$method.random_state, | |
113 copy=False) | |
114 | |
115 #end if | |
116 | |
117 @CMD_anndata_write_modify_outputs@ | |
118 ]]></configfile> | |
119 </configfiles> | |
120 <inputs> | |
121 <expand macro="inputs_anndata"/> | |
122 <conditional name="method"> | |
123 <param argument="method" type="select" label="Method used for filtering"> | |
124 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using `pp.filter_cells`</option> | |
125 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using `pp.filter_genes`</option> | |
126 <option value="pp.filter_genes_dispersion">Extract highly variable genes, using `pp.filter_genes_dispersion`</option> | |
127 <!--<option value="pp.highly_variable_genes">, using `tl.highly_variable_genes`</option>!--> | |
128 <option value="pp.subsample">Subsample to a fraction of the number of observations, using `pp.subsample`</option> | |
129 <!--<option value="queries.gene_coordinates">, using `queries.gene_coordinates`</option>!--> | |
130 <!--<option value="queries.mitochondrial_genes">, using `queries.mitochondrial_genes`</option>!--> | |
131 </param> | |
132 <when value="pp.filter_cells"> | |
133 <conditional name="filter"> | |
134 <param argument="filter" type="select" label="Filter"> | |
135 <option value="min_counts">Minimum number of counts</option> | |
136 <option value="max_counts">Maximum number of counts</option> | |
137 <option value="min_genes">Minimum number of genes expressed</option> | |
138 <option value="max_genes">Maximum number of genes expressed</option> | |
139 </param> | |
140 <when value="min_counts"> | |
141 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a cell to pass filtering" help=""/> | |
142 </when> | |
143 <when value="max_counts"> | |
144 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a cell to pass filtering" help=""/> | |
145 </when> | |
146 <when value="min_genes"> | |
147 <param argument="min_genes" type="integer" min="0" value="" label="Minimum number of genes expressed required for a cell to pass filtering" help=""/> | |
148 </when> | |
149 <when value="max_genes"> | |
150 <param argument="max_genes" type="integer" min="0" value="" label="Maximum number of genes expressed required for a cell to pass filtering" help=""/> | |
151 </when> | |
152 </conditional> | |
153 </when> | |
154 <when value="pp.filter_genes"> | |
155 <conditional name="filter"> | |
156 <param argument="filter" type="select" label="Filter"> | |
157 <option value="min_counts">Minimum number of counts</option> | |
158 <option value="max_counts">Maximum number of counts</option> | |
159 <option value="min_cells">Minimum number of cells expressed</option> | |
160 <option value="max_cells">Maximum number of cells expressed</option> | |
161 </param> | |
162 <when value="min_counts"> | |
163 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering" help=""/> | |
164 </when> | |
165 <when value="max_counts"> | |
166 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering" help=""/> | |
167 </when> | |
168 <when value="min_cells"> | |
169 <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering" help=""/> | |
170 </when> | |
171 <when value="max_cells"> | |
172 <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering" help=""/> | |
173 </when> | |
174 </conditional> | |
175 </when> | |
176 <when value="pp.filter_genes_dispersion"> | |
177 <conditional name='flavor'> | |
178 <param argument="flavor" type="select" label="Flavor for computing normalized dispersion" help=""> | |
179 <option value="seurat">seurat: expects non-logarithmized data</option> | |
180 <option value="cell_ranger">cell_ranger: usually called for logarithmized data</option> | |
181 </param> | |
182 <when value="seurat"> | |
183 <param argument="min_mean" type="float" value="0.0125" label="Minimal mean cutoff" help=""/> | |
184 <param argument="max_mean" type="float" value="3" label="Maximal mean cutoff" help=""/> | |
185 <param argument="min_disp" type="float" value="0.5" label="Minimal normalized dispersion cutoff" help=""/> | |
186 <param argument="max_disp" type="float" value="" optional="true" label="Maximal normalized dispersion cutoff" help=""/> | |
187 </when> | |
188 <when value="cell_ranger"> | |
189 <param argument="n_top_genes" type="integer" value="" label="Number of highly-variable genes to keep" help=""/> | |
190 </when> | |
191 </conditional> | |
192 <param argument="n_bins" type="integer" value="20" label="Number of bins for binning the mean gene expression" help="Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1"/> | |
193 <expand macro="param_log"/> | |
194 </when> | |
195 <when value="pp.subsample"> | |
196 <conditional name="type"> | |
197 <param name="type" type="select" label="Type of subsampling"> | |
198 <option value="fraction">By fraction</option> | |
199 <option value="n_obs">By number of observation</option> | |
200 </param> | |
201 <when value="fraction"> | |
202 <param argument="fraction" type="float" value="" label="Subsample to this `fraction` of the number of observations" help=""/> | |
203 </when> | |
204 <when value="n_obs"> | |
205 <param argument="n_obs" type="integer" min="0" value="" label="Subsample to this number of observations" help=""/> | |
206 </when> | |
207 </conditional> | |
208 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling" help=""/> | |
209 </when> | |
210 </conditional> | |
211 <expand macro="anndata_modify_output_input"/> | |
212 </inputs> | |
213 <outputs> | |
214 <expand macro="anndata_modify_outputs"/> | |
215 <!-- for pp.filter_cells --> | |
216 <data name="counts_per_cell" format="tabular" label="${tool.name} on ${on_string}: Counts per cells after filtering"> | |
217 <filter>method['method'] == 'pp.filter_cells' and (method['filter']['filter'] == 'min_counts' or method['filter']['filter'] == 'max_counts')</filter> | |
218 </data> | |
219 <data name="genes_per_cell" format="tabular" label="${tool.name} on ${on_string}: Number of genes per cell after filtering"> | |
220 <filter>method['method'] == 'pp.filter_cells' and (method['filter']['filter'] == 'min_genes' or method['filter']['filter'] == 'max_genes')</filter> | |
221 </data> | |
222 <!-- for pp.filter_genes --> | |
223 <data name="counts_per_gene" format="tabular" label="${tool.name} on ${on_string}: Counts per genes after filtering"> | |
224 <filter>method['method'] == 'pp.filter_genes' and (method['filter']['filter'] == 'min_counts' or method['filter']['filter'] == 'max_counts')</filter> | |
225 </data> | |
226 <data name="cells_per_gene" format="tabular" label="${tool.name} on ${on_string}: Number of cells per genes after filtering"> | |
227 <filter>method['method'] == 'pp.filter_genes' and (method['filter']['filter'] == 'min_cells' or method['filter']['filter'] == 'max_cells')</filter> | |
228 </data> | |
229 <!-- for pp.filter_genes_dispersion --> | |
230 <data name="per_gene" format="tabular" label="${tool.name} on ${on_string}: Means, dispersions and normalized dispersions per gene"> | |
231 <filter>method['method'] == 'pp.filter_genes_dispersion'</filter> | |
232 </data> | |
233 </outputs> | |
234 <tests> | |
235 <test expect_num_outputs="2"> | |
236 <conditional name="input"> | |
237 <param name="format" value="h5ad" /> | |
238 <param name="adata" value="krumsiek11.h5ad" /> | |
239 </conditional> | |
240 <conditional name="method"> | |
241 <param name="method" value="pp.filter_cells"/> | |
242 <conditional name="filter"> | |
243 <param name="filter" value="min_counts"/> | |
244 <param name="min_counts" value="3"/> | |
245 </conditional> | |
246 </conditional> | |
247 <conditional name="modify_anndata"> | |
248 <param name="modify_anndata" value="true"/> | |
249 <param name="anndata_output_format" value="h5ad" /> | |
250 </conditional> | |
251 <assert_stdout> | |
252 <has_text_matching expression="sc.pp.filter_cells"/> | |
253 <has_text_matching expression="min_counts=3"/> | |
254 </assert_stdout> | |
255 <output name="anndata_out_h5ad" file="pp.filter_cells.krumsiek11-min_counts.h5ad" ftype="h5" compare="sim_size"/> | |
256 <output name="counts_per_cell"> | |
257 <assert_contents> | |
258 <has_text_matching expression="cell_type\tn_counts" /> | |
259 <has_text_matching expression="46\tprogenitor\t3.028" /> | |
260 <has_text_matching expression="85\tEry\t3.7001" /> | |
261 <has_text_matching expression="150\tMk\t4.095" /> | |
262 <has_n_columns n="3" /> | |
263 </assert_contents> | |
264 </output> | |
265 </test> | |
266 <test expect_num_outputs="2"> | |
267 <conditional name="input"> | |
268 <param name="format" value="loom" /> | |
269 <param name="adata" value="krumsiek11.loom" /> | |
270 <param name="sparse" value="True"/> | |
271 <param name="cleanup" value="False"/> | |
272 <param name="x_name" value="spliced"/> | |
273 <param name="obs_names" value="CellID" /> | |
274 <param name="var_names" value="Gene"/> | |
275 </conditional> | |
276 <conditional name="method"> | |
277 <param name="method" value="pp.filter_cells"/> | |
278 <conditional name="filter"> | |
279 <param name="filter" value="min_counts"/> | |
280 <param name="min_counts" value="3"/> | |
281 </conditional> | |
282 </conditional> | |
283 <conditional name="modify_anndata"> | |
284 <param name="modify_anndata" value="true"/> | |
285 <param name="anndata_output_format" value="loom" /> | |
286 </conditional> | |
287 <assert_stdout> | |
288 <has_text_matching expression="sc.pp.filter_cells"/> | |
289 <has_text_matching expression="min_counts=3"/> | |
290 </assert_stdout> | |
291 <output name="anndata_out_loom" file="pp.filter_cells.krumsiek11-min_counts.loom" ftype="loom" compare="sim_size"/> | |
292 <output name="counts_per_cell"> | |
293 <assert_contents> | |
294 <has_text_matching expression="cell_type\tn_counts" /> | |
295 <has_text_matching expression="46\tprogenitor\t3.028" /> | |
296 <has_text_matching expression="85\tEry\t3.7001" /> | |
297 <has_text_matching expression="97\tMo\t3.925" /> | |
298 <has_text_matching expression="150\tMk\t4.095" /> | |
299 <has_n_columns n="3" /> | |
300 </assert_contents> | |
301 </output> | |
302 </test> | |
303 <test expect_num_outputs="1"> | |
304 <conditional name="input"> | |
305 <param name="format" value="h5ad" /> | |
306 <param name="adata" value="krumsiek11.h5ad"/> | |
307 </conditional> | |
308 <conditional name="method"> | |
309 <param name="method" value="pp.filter_cells"/> | |
310 <conditional name="filter"> | |
311 <param name="filter" value="max_genes"/> | |
312 <param name="max_genes" value="100"/> | |
313 </conditional> | |
314 </conditional> | |
315 <conditional name="modify_anndata"> | |
316 <param name="modify_anndata" value="false"/> | |
317 </conditional> | |
318 <assert_stdout> | |
319 <has_text_matching expression="sc.pp.filter_cells"/> | |
320 <has_text_matching expression="adata.X"/> | |
321 <has_text_matching expression="max_genes=100"/> | |
322 </assert_stdout> | |
323 <output name="genes_per_cell" file="pp.filter_cells.number_per_cell.krumsiek11-max_genes.tabular"/> | |
324 </test> | |
325 <test expect_num_outputs="2"> | |
326 <conditional name="input"> | |
327 <param name="format" value="h5ad" /> | |
328 <param name="adata" value="krumsiek11.h5ad" /> | |
329 </conditional> | |
330 <conditional name="method"> | |
331 <param name="method" value="pp.filter_genes"/> | |
332 <conditional name="filter"> | |
333 <param name="filter" value="min_counts"/> | |
334 <param name="min_counts" value="3"/> | |
335 </conditional> | |
336 </conditional> | |
337 <conditional name="modify_anndata"> | |
338 <param name="modify_anndata" value="true"/> | |
339 <param name="anndata_output_format" value="h5ad" /> | |
340 </conditional> | |
341 <assert_stdout> | |
342 <has_text_matching expression="sc.pp.filter_genes"/> | |
343 <has_text_matching expression="min_counts=3"/> | |
344 </assert_stdout> | |
345 <output name="anndata_out_h5ad" file="pp.filter_genes.krumsiek11-min_counts.h5ad" ftype="h5" compare="sim_size"/> | |
346 <output name="counts_per_gene" file="pp.filter_genes.number_per_gene.krumsiek11-min_counts.tabular"/> | |
347 </test> | |
348 <test expect_num_outputs="1"> | |
349 <conditional name="input"> | |
350 <param name="format" value="h5ad" /> | |
351 <param name="adata" value="pbmc68k_reduced.h5ad"/> | |
352 </conditional> | |
353 <conditional name="method"> | |
354 <param name="method" value="pp.filter_genes"/> | |
355 <conditional name="filter"> | |
356 <param name="filter" value="max_cells"/> | |
357 <param name="max_cells" value="500"/> | |
358 </conditional> | |
359 </conditional> | |
360 <conditional name="modify_anndata"> | |
361 <param name="modify_anndata" value="false"/> | |
362 </conditional> | |
363 <assert_stdout> | |
364 <has_text_matching expression="sc.pp.filter_genes"/> | |
365 <has_text_matching expression="adata.X"/> | |
366 <has_text_matching expression="max_cells=500"/> | |
367 </assert_stdout> | |
368 <output name="cells_per_gene" file="pp.filter_genes.number_per_gene.pbmc68k_reduced-max_cells.tabular"/> | |
369 </test> | |
370 <test expect_num_outputs="2"> | |
371 <conditional name="input"> | |
372 <param name="format" value="h5ad" /> | |
373 <param name="adata" value="krumsiek11.h5ad" /> | |
374 </conditional> | |
375 <conditional name="method"> | |
376 <param name="method" value="pp.filter_genes_dispersion"/> | |
377 <conditional name="flavor"> | |
378 <param name="flavor" value="seurat"/> | |
379 <param name="min_mean" value="0.0125"/> | |
380 <param name="max_mean" value="3"/> | |
381 <param name="min_disp" value="0.5"/> | |
382 </conditional> | |
383 <param name="n_bins" value="20" /> | |
384 <param name="log" value="true"/> | |
385 </conditional> | |
386 <conditional name="modify_anndata"> | |
387 <param name="modify_anndata" value="true"/> | |
388 <param name="anndata_output_format" value="h5ad" /> | |
389 </conditional> | |
390 <assert_stdout> | |
391 <has_text_matching expression="sc.pp.filter_genes_dispersion"/> | |
392 <has_text_matching expression="flavor='seurat'"/> | |
393 <has_text_matching expression="min_mean=0.0125"/> | |
394 <has_text_matching expression="max_mean=3.0"/> | |
395 <has_text_matching expression="min_disp=0.5"/> | |
396 <has_text_matching expression="n_bins=20"/> | |
397 <has_text_matching expression="log=True"/> | |
398 </assert_stdout> | |
399 <output name="anndata_out_h5ad" file="pp.filter_genes_dispersion.krumsiek11-seurat.h5ad" ftype="h5" compare="sim_size"/> | |
400 <output name="per_gene" file="pp.filter_genes_dispersion.per_gene.krumsiek11-seurat.tabular"/> | |
401 </test> | |
402 <test expect_num_outputs="1"> | |
403 <conditional name="input"> | |
404 <param name="format" value="h5ad" /> | |
405 <param name="adata" value="krumsiek11.h5ad" /> | |
406 </conditional> | |
407 <conditional name="method"> | |
408 <param name="method" value="pp.filter_genes_dispersion"/> | |
409 <conditional name="flavor"> | |
410 <param name="flavor" value="cell_ranger"/> | |
411 <param name="n_top_genes" value="2"/> | |
412 </conditional> | |
413 <param name="n_bins" value="20"/> | |
414 <param name="log" value="true"/> | |
415 </conditional> | |
416 <conditional name="modify_anndata"> | |
417 <param name="modify_anndata" value="false"/> | |
418 </conditional> | |
419 <assert_stdout> | |
420 <has_text_matching expression="sc.pp.filter_genes_dispersion"/> | |
421 <has_text_matching expression="flavor='cell_ranger'"/> | |
422 <has_text_matching expression="n_top_genes=2"/> | |
423 <has_text_matching expression="n_bins=20"/> | |
424 <has_text_matching expression="og=True"/> | |
425 </assert_stdout> | |
426 <output name="per_gene" file="pp.filter_genes_dispersion.per_gene.krumsiek11-cell_ranger.tabular"/> | |
427 </test> | |
428 <test expect_num_outputs="1"> | |
429 <conditional name="input"> | |
430 <param name="format" value="h5ad" /> | |
431 <param name="adata" value="krumsiek11.h5ad" /> | |
432 </conditional> | |
433 <conditional name="method"> | |
434 <param name="method" value="pp.subsample"/> | |
435 <conditional name="type"> | |
436 <param name="type" value="fraction" /> | |
437 <param name="fraction" value="0.5"/> | |
438 </conditional> | |
439 <param name="random_state" value="0"/> | |
440 </conditional> | |
441 <conditional name="modify_anndata"> | |
442 <param name="modify_anndata" value="true"/> | |
443 <param name="anndata_output_format" value="h5ad" /> | |
444 </conditional> | |
445 <assert_stdout> | |
446 <has_text_matching expression="sc.pp.subsample"/> | |
447 <has_text_matching expression="fraction=0.5"/> | |
448 <has_text_matching expression="random_state=0"/> | |
449 </assert_stdout> | |
450 <output name="anndata_out_h5ad" file="pp.subsample.krumsiek11_fraction.h5ad" ftype="h5" compare="sim_size"/> | |
451 </test> | |
452 <test expect_num_outputs="1"> | |
453 <conditional name="input"> | |
454 <param name="format" value="h5ad" /> | |
455 <param name="adata" value="krumsiek11.h5ad" /> | |
456 </conditional> | |
457 <conditional name="method"> | |
458 <param name="method" value="pp.subsample"/> | |
459 <conditional name="type"> | |
460 <param name="type" value="n_obs" /> | |
461 <param name="n_obs" value="10"/> | |
462 </conditional> | |
463 <param name="random_state" value="0"/> | |
464 </conditional> | |
465 <conditional name="modify_anndata"> | |
466 <param name="modify_anndata" value="true"/> | |
467 <param name="anndata_output_format" value="h5ad" /> | |
468 </conditional> | |
469 <assert_stdout> | |
470 <has_text_matching expression="sc.pp.subsample"/> | |
471 <has_text_matching expression="n_obs=10"/> | |
472 <has_text_matching expression="random_state=0"/> | |
473 </assert_stdout> | |
474 <output name="anndata_out_h5ad" file="pp.subsample.krumsiek11_n_obs.h5ad" ftype="h5" compare="sim_size"/> | |
475 </test> | |
476 </tests> | |
477 <help><![CDATA[ | |
478 | |
479 Filter cells outliers based on counts and numbers of genes expressed (`pp.filter_cells`) | |
480 ======================================================================================== | |
481 | |
482 For instance, only keep cells with at least `min_counts` counts or | |
483 `min_genes` genes expressed. This is to filter measurement outliers, i.e., | |
484 "unreliable" observations. | |
485 | |
486 Only provide one of the optional parameters `min_counts`, `min_genes`, | |
487 `max_counts`, `max_genes` per call. | |
488 | |
489 More details on the `scanpy documentation | |
490 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_cells.html#scanpy.api.pp.filter_cells>`__ | |
491 | |
492 Return | |
493 ------ | |
494 | |
495 number_per_cell : Number per cell (either `n_counts` or `n_genes` per cell) | |
496 | |
497 | |
498 Filter genes based on number of cells or counts (`pp.filter_genes`) | |
499 =================================================================== | |
500 | |
501 Keep genes that have at least `min_counts` counts or are expressed in at | |
502 least `min_cells` cells or have at most `max_counts` counts or are expressed | |
503 in at most `max_cells` cells. | |
504 | |
505 Only provide one of the optional parameters `min_counts`, `min_cells`, | |
506 `max_counts`, `max_cells` per call. | |
507 | |
508 More details on the `scanpy documentation | |
509 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_genes.html#scanpy.api.pp.filter_genes>`__ | |
510 | |
511 Return | |
512 ------ | |
513 | |
514 number_per_gene : Number per genes (either `n_counts` or `n_genes` per cell) | |
515 | |
516 | |
517 Extract highly variable genes (`pp.filter_genes_dispersion`) | |
518 ============================================================ | |
519 | |
520 If trying out parameters, pass the data matrix instead of AnnData. | |
521 | |
522 Depending on `flavor`, this reproduces the R-implementations of Seurat and Cell Ranger. | |
523 | |
524 The normalized dispersion is obtained by scaling with the mean and standard | |
525 deviation of the dispersions for genes falling into a given bin for mean | |
526 expression of genes. This means that for each bin of mean expression, highly | |
527 variable genes are selected. | |
528 | |
529 Use `flavor='cell_ranger'` with care and in the same way as in `pp.recipe_zheng17`. | |
530 | |
531 More details on the `scanpy documentation | |
532 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_genes_dispersion.html#scanpy.api.pp.filter_genes_dispersion>`__ | |
533 | |
534 Returns | |
535 ------- | |
536 - The annotated matrix filtered, with the annotations | |
537 - A table with the means, dispersions, and normalized dispersions per gene, logarithmized when `log` is `True`. | |
538 | |
539 | |
540 Subsample to a fraction of the number of observations (`pp.subsample`) | |
541 ====================================================================== | |
542 | |
543 More details on the `scanpy documentation | |
544 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.subsample.html#scanpy.api.pp.subsample>`__ | |
545 | |
546 | |
547 ]]></help> | |
548 <expand macro="citations"/> | |
549 </tool> |