comparison filter.xml @ 1:6a76b60e05f5 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 8ef5f7c6f8728608a3f05bb51e11b642b84a05f5"
author iuc
date Wed, 16 Oct 2019 06:32:33 -0400
parents 6ea5a05a260a
children e62673c32a5d
comparison
equal deleted inserted replaced
0:6ea5a05a260a 1:6a76b60e05f5
1 <tool id="scanpy_filter" name="Filter with scanpy" version="@galaxy_version@"> 1 <tool id="scanpy_filter" name="Filter" version="@galaxy_version@">
2 <description></description> 2 <description>with scanpy</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements"/> 6 <expand macro="requirements"/>
7 <expand macro="version_command"/> 7 <expand macro="version_command"/>
12 <configfile name="script_file"><![CDATA[ 12 <configfile name="script_file"><![CDATA[
13 @CMD_imports@ 13 @CMD_imports@
14 @CMD_read_inputs@ 14 @CMD_read_inputs@
15 15
16 #if $method.method == 'pp.filter_cells' 16 #if $method.method == 'pp.filter_cells'
17 res = sc.pp.filter_cells( 17 sc.pp.filter_cells(
18 #if $modify_anndata.modify_anndata == 'true'
19 adata, 18 adata,
20 #else
21 adata.X,
22 #end if
23 #if $method.filter.filter == 'min_counts' 19 #if $method.filter.filter == 'min_counts'
24 min_counts=$method.filter.min_counts, 20 min_counts=$method.filter.min_counts,
25 #elif $method.filter.filter == 'max_counts' 21 #else if $method.filter.filter == 'max_counts'
26 max_counts=$method.filter.max_counts, 22 max_counts=$method.filter.max_counts,
27 #elif $method.filter.filter == 'min_genes' 23 #else if $method.filter.filter == 'min_genes'
28 min_genes=$method.filter.min_genes, 24 min_genes=$method.filter.min_genes,
29 #elif $method.filter.filter == 'max_genes' 25 #else if $method.filter.filter == 'max_genes'
30 max_genes=$method.filter.max_genes, 26 max_genes=$method.filter.max_genes,
31 #end if 27 #end if
32 copy=False) 28 copy=False)
33 29
34 #if $modify_anndata.modify_anndata == 'true' 30 #else if $method.method == 'pp.filter_genes'
35 df = adata.obs 31 sc.pp.filter_genes(
36 #else
37 df = pd.DataFrame(data=dict(cell_subset=res[0], number_per_cell=res[1]))
38 #end if
39
40 #if $method.filter.filter == 'min_counts' or $method.filter.filter == 'max_counts'
41 df.to_csv('$counts_per_cell', sep='\t')
42 #elif $method.filter.filter == 'min_genes' or $method.filter.filter == 'max_genes'
43 df.to_csv('$genes_per_cell', sep='\t')
44 #end if
45
46 #elif $method.method == 'pp.filter_genes'
47 res = sc.pp.filter_genes(
48 #if $modify_anndata.modify_anndata == 'true'
49 adata, 32 adata,
50 #else
51 adata.X,
52 #end if
53 #if $method.filter.filter == 'min_counts' 33 #if $method.filter.filter == 'min_counts'
54 min_counts=$method.filter.min_counts, 34 min_counts=$method.filter.min_counts,
55 #elif $method.filter.filter == 'max_counts' 35 #else if $method.filter.filter == 'max_counts'
56 max_counts=$method.filter.max_counts, 36 max_counts=$method.filter.max_counts,
57 #elif $method.filter.filter == 'min_cells' 37 #else if $method.filter.filter == 'min_cells'
58 min_cells=$method.filter.min_cells, 38 min_cells=$method.filter.min_cells,
59 #elif $method.filter.filter == 'max_cells' 39 #else if $method.filter.filter == 'max_cells'
60 max_cells=$method.filter.max_cells, 40 max_cells=$method.filter.max_cells,
61 #end if 41 #end if
62 copy=False) 42 copy=False)
63 43
64 #if $modify_anndata.modify_anndata == 'true' 44 #else if $method.method == 'tl.filter_rank_genes_groups'
65 df = adata.var 45 sc.tl.filter_rank_genes_groups(
66 #else
67 df = pd.DataFrame(data=dict(gene_subset=res[0], number_per_gene=res[1]))
68 #end if
69
70 #if $method.filter.filter == 'min_counts' or $method.filter.filter == 'max_counts'
71 df.to_csv('$counts_per_gene', sep='\t')
72 #elif $method.filter.filter == 'min_cells' or $method.filter.filter == 'max_cells'
73 df.to_csv('$cells_per_gene', sep='\t')
74 #end if
75
76 #elif $method.method == 'pp.filter_genes_dispersion'
77 res = sc.pp.filter_genes_dispersion(
78 #if $modify_anndata.modify_anndata == 'true'
79 adata, 46 adata,
80 #else 47 #if str($method.key) != ''
81 adata.X, 48 key='$method.key',
82 #end if 49 #end if
50 #if str($method.groupby) != ''
51 groupby='$method.groupby',
52 #end if
53 use_raw=$method.use_raw,
54 log=$method.log,
55 key_added='$method.key_added',
56 min_in_group_fraction=$method.min_in_group_fraction,
57 max_out_group_fraction=$method.max_out_group_fraction,
58 min_fold_change=$method.min_fold_change)
59
60 #else if $method.method == "pp.highly_variable_genes"
61 sc.pp.highly_variable_genes(
62 adata=adata,
83 flavor='$method.flavor.flavor', 63 flavor='$method.flavor.flavor',
84 #if $method.flavor.flavor=='seurat' 64 #if $method.flavor.flavor == 'seurat'
65 #if str($method.flavor.min_mean) != ''
85 min_mean=$method.flavor.min_mean, 66 min_mean=$method.flavor.min_mean,
67 #end if
68 #if str($method.flavor.max_mean) != ''
86 max_mean=$method.flavor.max_mean, 69 max_mean=$method.flavor.max_mean,
70 #end if
71 #if str($method.flavor.min_disp) != ''
87 min_disp=$method.flavor.min_disp, 72 min_disp=$method.flavor.min_disp,
88 #if $method.flavor.max_disp 73 #end if
74 #if str($method.flavor.max_disp) != ''
89 max_disp=$method.flavor.max_disp, 75 max_disp=$method.flavor.max_disp,
90 #end if 76 #end if
91 #else 77 #else if $method.flavor.flavor == 'cell_ranger'
92 n_top_genes=$method.flavor.n_top_genes, 78 n_top_genes=$method.flavor.n_top_genes,
93 #end if 79 #end if
94 n_bins=$method.n_bins, 80 n_bins=$method.n_bins,
95 log=$method.log, 81 subset=$method.subset,
96 copy=False) 82 inplace=True)
97 83
98 #if $modify_anndata.modify_anndata == 'true' 84 #else if $method.method == 'pp.subsample'
99 adata.var.to_csv('$per_gene', sep='\t')
100 #else
101 pd.DataFrame(res).to_csv('$per_gene', sep='\t')
102 #end if
103
104 #elif $method.method == 'pp.subsample'
105 sc.pp.subsample( 85 sc.pp.subsample(
106 data=adata, 86 data=adata,
107 #if $method.type.type == 'fraction' 87 #if $method.type.type == 'fraction'
108 fraction=$method.type.fraction, 88 fraction=$method.type.fraction,
109 #else if $method.type.type == 'n_obs' 89 #else if $method.type.type == 'n_obs'
110 n_obs=$method.type.n_obs, 90 n_obs=$method.type.n_obs,
111 #end if 91 #end if
112 random_state=$method.random_state, 92 random_state=$method.random_state,
113 copy=False) 93 copy=False)
114 94
95 #else if $method.method == "pp.downsample_counts"
96 sc.pp.downsample_counts(
97 adata=adata,
98 #if str($method.counts_per_cell) != ''
99 counts_per_cell=$method.counts_per_cell,
100 #end if
101 #if str($method.total_counts) != ''
102 total_counts=$method.total_counts,
103 #end if
104 random_state=$method.random_state,
105 replace=$method.replace,
106 copy=False)
115 #end if 107 #end if
116 108
117 @CMD_anndata_write_modify_outputs@ 109 @CMD_anndata_write_outputs@
118 ]]></configfile> 110 ]]></configfile>
119 </configfiles> 111 </configfiles>
120 <inputs> 112 <inputs>
121 <expand macro="inputs_anndata"/> 113 <expand macro="inputs_anndata"/>
122 <conditional name="method"> 114 <conditional name="method">
123 <param argument="method" type="select" label="Method used for filtering"> 115 <param argument="method" type="select" label="Method used for filtering">
124 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using `pp.filter_cells`</option> 116 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using `pp.filter_cells`</option>
125 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using `pp.filter_genes`</option> 117 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using `pp.filter_genes`</option>
126 <option value="pp.filter_genes_dispersion">Extract highly variable genes, using `pp.filter_genes_dispersion`</option> 118 <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using `tl.filter_rank_genes_groups`</option>
127 <!--<option value="pp.highly_variable_genes">, using `tl.highly_variable_genes`</option>!--> 119 <option value="pp.highly_variable_genes">, using `tl.highly_variable_genes`</option>
128 <option value="pp.subsample">Subsample to a fraction of the number of observations, using `pp.subsample`</option> 120 <option value="pp.subsample">Subsample to a fraction of the number of observations, using `pp.subsample`</option>
129 <!--<option value="queries.gene_coordinates">, using `queries.gene_coordinates`</option>!--> 121 <option value="pp.downsample_counts">Downsample counts from count matrix, using `pp.downsample_counts`</option>
130 <!--<option value="queries.mitochondrial_genes">, using `queries.mitochondrial_genes`</option>!-->
131 </param> 122 </param>
132 <when value="pp.filter_cells"> 123 <when value="pp.filter_cells">
133 <conditional name="filter"> 124 <conditional name="filter">
134 <param argument="filter" type="select" label="Filter"> 125 <param argument="filter" type="select" label="Filter">
135 <option value="min_counts">Minimum number of counts</option> 126 <option value="min_counts">Minimum number of counts</option>
158 <option value="max_counts">Maximum number of counts</option> 149 <option value="max_counts">Maximum number of counts</option>
159 <option value="min_cells">Minimum number of cells expressed</option> 150 <option value="min_cells">Minimum number of cells expressed</option>
160 <option value="max_cells">Maximum number of cells expressed</option> 151 <option value="max_cells">Maximum number of cells expressed</option>
161 </param> 152 </param>
162 <when value="min_counts"> 153 <when value="min_counts">
163 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering" help=""/> 154 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering"/>
164 </when> 155 </when>
165 <when value="max_counts"> 156 <when value="max_counts">
166 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering" help=""/> 157 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering"/>
167 </when> 158 </when>
168 <when value="min_cells"> 159 <when value="min_cells">
169 <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering" help=""/> 160 <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering"/>
170 </when> 161 </when>
171 <when value="max_cells"> 162 <when value="max_cells">
172 <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering" help=""/> 163 <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering"/>
173 </when> 164 </when>
174 </conditional> 165 </conditional>
175 </when> 166 </when>
176 <when value="pp.filter_genes_dispersion"> 167 <when value="tl.filter_rank_genes_groups">
168 <param argument="key" type="text" optional="true" label="Key in adata.uns where the rank_genes_groups output is stored"/>
169 <param argument="groupby" type="text" optional="true" label="The key of the observations grouping to consider"/>
170 <expand macro="param_use_raw"/>
171 <expand macro="param_log"/>
172 <param argument="key_added" type="text" value="rank_genes_groups_filtered" label="Key that will contain new values"/>
173 <param argument="min_in_group_fraction" type="float" min="0" max="1" value="0.25" label="Minimum fraction of genes expressing the gene within the categories"/>
174 <param argument="max_out_group_fraction" type="float" min="0" max="1" value="0.5" label="Maximum fraction of genes expressing the gene outside the categories"/>
175 <param argument="min_fold_change" type="integer" value="2" label="Minimum fold change"/>
176 </when>
177 <when value="pp.highly_variable_genes">
177 <conditional name='flavor'> 178 <conditional name='flavor'>
178 <param argument="flavor" type="select" label="Flavor for computing normalized dispersion" help=""> 179 <param argument="flavor" type="select" label="Flavor for computing normalized dispersion">
179 <option value="seurat">seurat: expects non-logarithmized data</option> 180 <option value="seurat">seurat: expects non-logarithmized data</option>
180 <option value="cell_ranger">cell_ranger: usually called for logarithmized data</option> 181 <option value="cell_ranger">cell_ranger: usually called for logarithmized data</option>
181 </param> 182 </param>
182 <when value="seurat"> 183 <when value="seurat">
183 <param argument="min_mean" type="float" value="0.0125" label="Minimal mean cutoff" help=""/> 184 <param argument="min_mean" type="float" value="0.0125" label="Minimal mean cutoff"/>
184 <param argument="max_mean" type="float" value="3" label="Maximal mean cutoff" help=""/> 185 <param argument="max_mean" type="float" value="3" label="Maximal mean cutoff"/>
185 <param argument="min_disp" type="float" value="0.5" label="Minimal normalized dispersion cutoff" help=""/> 186 <param argument="min_disp" type="float" value="0.5" label="Minimal normalized dispersion cutoff"/>
186 <param argument="max_disp" type="float" value="" optional="true" label="Maximal normalized dispersion cutoff" help=""/> 187 <param argument="max_disp" type="float" value="" optional="true" label="Maximal normalized dispersion cutoff"/>
187 </when> 188 </when>
188 <when value="cell_ranger"> 189 <when value="cell_ranger">
189 <param argument="n_top_genes" type="integer" value="" label="Number of highly-variable genes to keep" help=""/> 190 <param argument="n_top_genes" type="integer" value="" label="Number of highly-variable genes to keep"/>
190 </when> 191 </when>
191 </conditional> 192 </conditional>
192 <param argument="n_bins" type="integer" value="20" label="Number of bins for binning the mean gene expression" help="Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1"/> 193 <param argument="n_bins" type="integer" value="20" label="Number of bins for binning the mean gene expression" help="Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1"/>
193 <expand macro="param_log"/> 194 <param argument="subset" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Inplace subset to highly-variable genes?" help="Otherwise it merely indicates highly variable genes."/>
194 </when> 195 </when>
195 <when value="pp.subsample"> 196 <when value="pp.subsample">
196 <conditional name="type"> 197 <conditional name="type">
197 <param name="type" type="select" label="Type of subsampling"> 198 <param name="type" type="select" label="Type of subsampling">
198 <option value="fraction">By fraction</option> 199 <option value="fraction">By fraction</option>
199 <option value="n_obs">By number of observation</option> 200 <option value="n_obs">By number of observation</option>
200 </param> 201 </param>
201 <when value="fraction"> 202 <when value="fraction">
202 <param argument="fraction" type="float" value="" label="Subsample to this `fraction` of the number of observations" help=""/> 203 <param argument="fraction" type="float" value="" label="Subsample to this `fraction` of the number of observations"/>
203 </when> 204 </when>
204 <when value="n_obs"> 205 <when value="n_obs">
205 <param argument="n_obs" type="integer" min="0" value="" label="Subsample to this number of observations" help=""/> 206 <param argument="n_obs" type="integer" min="0" value="" label="Subsample to this number of observations"/>
206 </when> 207 </when>
207 </conditional> 208 </conditional>
208 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling" help=""/> 209 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/>
210 </when>
211 <when value="pp.downsample_counts">
212 <param argument="counts_per_cell" type="integer" min="0" optional="true" label="Target total counts per cell" help="If a cell has more than ‘counts_per_cell’, it will be downsampled to this number. Resulting counts can be specified on a per cell basis by passing an array."/>
213 <param argument="total_counts" type="integer" min="0" optional="true" label="Target total counts" help="If the count matrix has more than total_counts it will be downsampled to have this number."/>
214 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/>
215 <param argument="replace" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Sample the counts with replacement?"/>
209 </when> 216 </when>
210 </conditional> 217 </conditional>
211 <expand macro="anndata_modify_output_input"/>
212 </inputs> 218 </inputs>
213 <outputs> 219 <outputs>
214 <expand macro="anndata_modify_outputs"/> 220 <expand macro="anndata_outputs"/>
215 <!-- for pp.filter_cells -->
216 <data name="counts_per_cell" format="tabular" label="${tool.name} on ${on_string}: Counts per cells after filtering">
217 <filter>method['method'] == 'pp.filter_cells' and (method['filter']['filter'] == 'min_counts' or method['filter']['filter'] == 'max_counts')</filter>
218 </data>
219 <data name="genes_per_cell" format="tabular" label="${tool.name} on ${on_string}: Number of genes per cell after filtering">
220 <filter>method['method'] == 'pp.filter_cells' and (method['filter']['filter'] == 'min_genes' or method['filter']['filter'] == 'max_genes')</filter>
221 </data>
222 <!-- for pp.filter_genes -->
223 <data name="counts_per_gene" format="tabular" label="${tool.name} on ${on_string}: Counts per genes after filtering">
224 <filter>method['method'] == 'pp.filter_genes' and (method['filter']['filter'] == 'min_counts' or method['filter']['filter'] == 'max_counts')</filter>
225 </data>
226 <data name="cells_per_gene" format="tabular" label="${tool.name} on ${on_string}: Number of cells per genes after filtering">
227 <filter>method['method'] == 'pp.filter_genes' and (method['filter']['filter'] == 'min_cells' or method['filter']['filter'] == 'max_cells')</filter>
228 </data>
229 <!-- for pp.filter_genes_dispersion -->
230 <data name="per_gene" format="tabular" label="${tool.name} on ${on_string}: Means, dispersions and normalized dispersions per gene">
231 <filter>method['method'] == 'pp.filter_genes_dispersion'</filter>
232 </data>
233 </outputs> 221 </outputs>
234 <tests> 222 <tests>
235 <test expect_num_outputs="2"> 223 <test>
236 <conditional name="input"> 224 <!-- test 1 -->
237 <param name="format" value="h5ad" /> 225 <param name="adata" value="krumsiek11.h5ad" />
238 <param name="adata" value="krumsiek11.h5ad" />
239 </conditional>
240 <conditional name="method"> 226 <conditional name="method">
241 <param name="method" value="pp.filter_cells"/> 227 <param name="method" value="pp.filter_cells"/>
242 <conditional name="filter"> 228 <conditional name="filter">
243 <param name="filter" value="min_counts"/> 229 <param name="filter" value="min_counts"/>
244 <param name="min_counts" value="3"/> 230 <param name="min_counts" value="3"/>
245 </conditional> 231 </conditional>
246 </conditional> 232 </conditional>
247 <conditional name="modify_anndata">
248 <param name="modify_anndata" value="true"/>
249 <param name="anndata_output_format" value="h5ad" />
250 </conditional>
251 <assert_stdout> 233 <assert_stdout>
252 <has_text_matching expression="sc.pp.filter_cells"/> 234 <has_text_matching expression="sc.pp.filter_cells"/>
253 <has_text_matching expression="min_counts=3"/> 235 <has_text_matching expression="min_counts=3"/>
254 </assert_stdout> 236 </assert_stdout>
255 <output name="anndata_out_h5ad" file="pp.filter_cells.krumsiek11-min_counts.h5ad" ftype="h5" compare="sim_size"/> 237 <output name="anndata_out" file="pp.filter_cells.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/>
256 <output name="counts_per_cell"> 238 </test>
257 <assert_contents> 239 <test>
258 <has_text_matching expression="cell_type\tn_counts" /> 240 <!-- test 2 -->
259 <has_text_matching expression="46\tprogenitor\t3.028" /> 241 <param name="adata" value="krumsiek11.h5ad" />
260 <has_text_matching expression="85\tEry\t3.7001" />
261 <has_text_matching expression="150\tMk\t4.095" />
262 <has_n_columns n="3" />
263 </assert_contents>
264 </output>
265 </test>
266 <test expect_num_outputs="2">
267 <conditional name="input">
268 <param name="format" value="loom" />
269 <param name="adata" value="krumsiek11.loom" />
270 <param name="sparse" value="True"/>
271 <param name="cleanup" value="False"/>
272 <param name="x_name" value="spliced"/>
273 <param name="obs_names" value="CellID" />
274 <param name="var_names" value="Gene"/>
275 </conditional>
276 <conditional name="method">
277 <param name="method" value="pp.filter_cells"/>
278 <conditional name="filter">
279 <param name="filter" value="min_counts"/>
280 <param name="min_counts" value="3"/>
281 </conditional>
282 </conditional>
283 <conditional name="modify_anndata">
284 <param name="modify_anndata" value="true"/>
285 <param name="anndata_output_format" value="loom" />
286 </conditional>
287 <assert_stdout>
288 <has_text_matching expression="sc.pp.filter_cells"/>
289 <has_text_matching expression="min_counts=3"/>
290 </assert_stdout>
291 <output name="anndata_out_loom" file="pp.filter_cells.krumsiek11-min_counts.loom" ftype="loom" compare="sim_size"/>
292 <output name="counts_per_cell">
293 <assert_contents>
294 <has_text_matching expression="cell_type\tn_counts" />
295 <has_text_matching expression="46\tprogenitor\t3.028" />
296 <has_text_matching expression="85\tEry\t3.7001" />
297 <has_text_matching expression="97\tMo\t3.925" />
298 <has_text_matching expression="150\tMk\t4.095" />
299 <has_n_columns n="3" />
300 </assert_contents>
301 </output>
302 </test>
303 <test expect_num_outputs="1">
304 <conditional name="input">
305 <param name="format" value="h5ad" />
306 <param name="adata" value="krumsiek11.h5ad"/>
307 </conditional>
308 <conditional name="method"> 242 <conditional name="method">
309 <param name="method" value="pp.filter_cells"/> 243 <param name="method" value="pp.filter_cells"/>
310 <conditional name="filter"> 244 <conditional name="filter">
311 <param name="filter" value="max_genes"/> 245 <param name="filter" value="max_genes"/>
312 <param name="max_genes" value="100"/> 246 <param name="max_genes" value="100"/>
313 </conditional> 247 </conditional>
314 </conditional> 248 </conditional>
315 <conditional name="modify_anndata">
316 <param name="modify_anndata" value="false"/>
317 </conditional>
318 <assert_stdout> 249 <assert_stdout>
319 <has_text_matching expression="sc.pp.filter_cells"/> 250 <has_text_matching expression="sc.pp.filter_cells"/>
320 <has_text_matching expression="adata.X"/> 251 <has_text_matching expression="adata"/>
321 <has_text_matching expression="max_genes=100"/> 252 <has_text_matching expression="max_genes=100"/>
322 </assert_stdout> 253 </assert_stdout>
323 <output name="genes_per_cell" file="pp.filter_cells.number_per_cell.krumsiek11-max_genes.tabular"/> 254 <output name="anndata_out" file="pp.filter_cells.krumsiek11-max_genes.h5ad" ftype="h5ad" compare="sim_size"/>
324 </test> 255 </test>
325 <test expect_num_outputs="2"> 256 <test>
326 <conditional name="input"> 257 <!-- test 3 -->
327 <param name="format" value="h5ad" /> 258 <param name="adata" value="krumsiek11.h5ad" />
328 <param name="adata" value="krumsiek11.h5ad" />
329 </conditional>
330 <conditional name="method"> 259 <conditional name="method">
331 <param name="method" value="pp.filter_genes"/> 260 <param name="method" value="pp.filter_genes"/>
332 <conditional name="filter"> 261 <conditional name="filter">
333 <param name="filter" value="min_counts"/> 262 <param name="filter" value="min_counts"/>
334 <param name="min_counts" value="3"/> 263 <param name="min_counts" value="3"/>
335 </conditional> 264 </conditional>
336 </conditional> 265 </conditional>
337 <conditional name="modify_anndata">
338 <param name="modify_anndata" value="true"/>
339 <param name="anndata_output_format" value="h5ad" />
340 </conditional>
341 <assert_stdout> 266 <assert_stdout>
342 <has_text_matching expression="sc.pp.filter_genes"/> 267 <has_text_matching expression="sc.pp.filter_genes"/>
343 <has_text_matching expression="min_counts=3"/> 268 <has_text_matching expression="min_counts=3"/>
344 </assert_stdout> 269 </assert_stdout>
345 <output name="anndata_out_h5ad" file="pp.filter_genes.krumsiek11-min_counts.h5ad" ftype="h5" compare="sim_size"/> 270 <output name="anndata_out" file="pp.filter_genes.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/>
346 <output name="counts_per_gene" file="pp.filter_genes.number_per_gene.krumsiek11-min_counts.tabular"/> 271 </test>
347 </test> 272 <test>
348 <test expect_num_outputs="1"> 273 <!-- test 4 -->
349 <conditional name="input"> 274 <param name="adata" value="tl.rank_genes_groups.krumsiek11.h5ad" />
350 <param name="format" value="h5ad" /> 275 <conditional name="method">
351 <param name="adata" value="pbmc68k_reduced.h5ad"/> 276 <param name="method" value="tl.filter_rank_genes_groups"/>
352 </conditional> 277 <param name="key" value="rank_genes_groups"/>
353 <conditional name="method"> 278 <param name="use_raw" value="False"/>
354 <param name="method" value="pp.filter_genes"/> 279 <param name="log" value="False"/>
355 <conditional name="filter"> 280 <param name="key_added" value="rank_genes_groups_filtered"/>
356 <param name="filter" value="max_cells"/> 281 <param name="min_in_group_fraction" value="0.25"/>
357 <param name="max_cells" value="500"/> 282 <param name="max_out_group_fraction" value="0.5"/>
358 </conditional> 283 <param name="min_fold_change" value="3"/>
359 </conditional> 284 </conditional>
360 <conditional name="modify_anndata"> 285 <assert_stdout>
361 <param name="modify_anndata" value="false"/> 286 <has_text_matching expression="tl.filter_rank_genes_groups"/>
362 </conditional> 287 <has_text_matching expression="key='rank_genes_groups'"/>
363 <assert_stdout> 288 <has_text_matching expression="use_raw=False"/>
364 <has_text_matching expression="sc.pp.filter_genes"/> 289 <has_text_matching expression="log=False"/>
365 <has_text_matching expression="adata.X"/> 290 <has_text_matching expression="key_added='rank_genes_groups_filtered'"/>
366 <has_text_matching expression="max_cells=500"/> 291 <has_text_matching expression="min_in_group_fraction=0.25"/>
367 </assert_stdout> 292 <has_text_matching expression="max_out_group_fraction=0.5"/>
368 <output name="cells_per_gene" file="pp.filter_genes.number_per_gene.pbmc68k_reduced-max_cells.tabular"/> 293 <has_text_matching expression="min_fold_change=3"/>
369 </test> 294 </assert_stdout>
370 <test expect_num_outputs="2"> 295 <output name="anndata_out" file="pp.filter_rank_genes_groups.h5ad" ftype="h5ad" compare="sim_size"/>
371 <conditional name="input"> 296 </test>
372 <param name="format" value="h5ad" /> 297 <test>
373 <param name="adata" value="krumsiek11.h5ad" /> 298 <!-- test 5 -->
374 </conditional> 299 <param name="adata" value="blobs.h5ad"/>
375 <conditional name="method"> 300 <conditional name="method">
376 <param name="method" value="pp.filter_genes_dispersion"/> 301 <param name="method" value="pp.highly_variable_genes"/>
377 <conditional name="flavor"> 302 <conditional name="flavor">
378 <param name="flavor" value="seurat"/> 303 <param name="flavor" value="seurat"/>
379 <param name="min_mean" value="0.0125"/> 304 <param name="min_mean" value="0.0125"/>
380 <param name="max_mean" value="3"/> 305 <param name="max_mean" value="3"/>
381 <param name="min_disp" value="0.5"/> 306 <param name="min_disp" value="0.5"/>
382 </conditional> 307 </conditional>
383 <param name="n_bins" value="20" /> 308 <param name="n_bins" value="20"/>
384 <param name="log" value="true"/> 309 <param name="subset" value="false"/>
385 </conditional> 310 </conditional>
386 <conditional name="modify_anndata"> 311 <assert_stdout>
387 <param name="modify_anndata" value="true"/> 312 <has_text_matching expression="sc.pp.highly_variable_genes"/>
388 <param name="anndata_output_format" value="h5ad" />
389 </conditional>
390 <assert_stdout>
391 <has_text_matching expression="sc.pp.filter_genes_dispersion"/>
392 <has_text_matching expression="flavor='seurat'"/> 313 <has_text_matching expression="flavor='seurat'"/>
393 <has_text_matching expression="min_mean=0.0125"/> 314 <has_text_matching expression="min_mean=0.0125"/>
394 <has_text_matching expression="max_mean=3.0"/> 315 <has_text_matching expression="max_mean=3"/>
395 <has_text_matching expression="min_disp=0.5"/> 316 <has_text_matching expression="min_disp=0.5"/>
396 <has_text_matching expression="n_bins=20"/> 317 <has_text_matching expression="n_bins=20"/>
397 <has_text_matching expression="log=True"/> 318 <has_text_matching expression="subset=False"/>
398 </assert_stdout> 319 </assert_stdout>
399 <output name="anndata_out_h5ad" file="pp.filter_genes_dispersion.krumsiek11-seurat.h5ad" ftype="h5" compare="sim_size"/> 320 <output name="anndata_out" file="pp.highly_variable_genes.seurat.blobs.h5ad" ftype="h5ad" compare="sim_size"/>
400 <output name="per_gene" file="pp.filter_genes_dispersion.per_gene.krumsiek11-seurat.tabular"/> 321 </test>
401 </test> 322 <test>
402 <test expect_num_outputs="1"> 323 <!-- test 6 -->
403 <conditional name="input"> 324 <param name="adata" value="krumsiek11.h5ad" />
404 <param name="format" value="h5ad" /> 325 <conditional name="method">
405 <param name="adata" value="krumsiek11.h5ad" /> 326 <param name="method" value="pp.highly_variable_genes"/>
406 </conditional>
407 <conditional name="method">
408 <param name="method" value="pp.filter_genes_dispersion"/>
409 <conditional name="flavor"> 327 <conditional name="flavor">
410 <param name="flavor" value="cell_ranger"/> 328 <param name="flavor" value="cell_ranger"/>
411 <param name="n_top_genes" value="2"/> 329 <param name="n_top_genes" value="2"/>
412 </conditional> 330 </conditional>
413 <param name="n_bins" value="20"/> 331 <param name="n_bins" value="20"/>
414 <param name="log" value="true"/> 332 </conditional>
415 </conditional> 333 <assert_stdout>
416 <conditional name="modify_anndata"> 334 <has_text_matching expression="sc.pp.highly_variable_genes"/>
417 <param name="modify_anndata" value="false"/>
418 </conditional>
419 <assert_stdout>
420 <has_text_matching expression="sc.pp.filter_genes_dispersion"/>
421 <has_text_matching expression="flavor='cell_ranger'"/> 335 <has_text_matching expression="flavor='cell_ranger'"/>
422 <has_text_matching expression="n_top_genes=2"/> 336 <has_text_matching expression="n_top_genes=2"/>
423 <has_text_matching expression="n_bins=20"/> 337 <has_text_matching expression="n_bins=20"/>
424 <has_text_matching expression="og=True"/> 338 </assert_stdout>
425 </assert_stdout> 339 <output name="anndata_out" file="pp.highly_variable_genes.krumsiek11-cell_ranger.h5ad" ftype="h5ad" compare="sim_size"/>
426 <output name="per_gene" file="pp.filter_genes_dispersion.per_gene.krumsiek11-cell_ranger.tabular"/> 340 </test>
427 </test> 341 <test>
428 <test expect_num_outputs="1"> 342 <!-- test 7 -->
429 <conditional name="input"> 343 <param name="adata" value="krumsiek11.h5ad" />
430 <param name="format" value="h5ad" />
431 <param name="adata" value="krumsiek11.h5ad" />
432 </conditional>
433 <conditional name="method"> 344 <conditional name="method">
434 <param name="method" value="pp.subsample"/> 345 <param name="method" value="pp.subsample"/>
435 <conditional name="type"> 346 <conditional name="type">
436 <param name="type" value="fraction" /> 347 <param name="type" value="fraction" />
437 <param name="fraction" value="0.5"/> 348 <param name="fraction" value="0.5"/>
438 </conditional> 349 </conditional>
439 <param name="random_state" value="0"/> 350 <param name="random_state" value="0"/>
440 </conditional> 351 </conditional>
441 <conditional name="modify_anndata">
442 <param name="modify_anndata" value="true"/>
443 <param name="anndata_output_format" value="h5ad" />
444 </conditional>
445 <assert_stdout> 352 <assert_stdout>
446 <has_text_matching expression="sc.pp.subsample"/> 353 <has_text_matching expression="sc.pp.subsample"/>
447 <has_text_matching expression="fraction=0.5"/> 354 <has_text_matching expression="fraction=0.5"/>
448 <has_text_matching expression="random_state=0"/> 355 <has_text_matching expression="random_state=0"/>
449 </assert_stdout> 356 </assert_stdout>
450 <output name="anndata_out_h5ad" file="pp.subsample.krumsiek11_fraction.h5ad" ftype="h5" compare="sim_size"/> 357 <output name="anndata_out" file="pp.subsample.krumsiek11_fraction.h5ad" ftype="h5ad" compare="sim_size"/>
451 </test> 358 </test>
452 <test expect_num_outputs="1"> 359 <test>
453 <conditional name="input"> 360 <!-- test 8 -->
454 <param name="format" value="h5ad" /> 361 <param name="adata" value="krumsiek11.h5ad" />
455 <param name="adata" value="krumsiek11.h5ad" />
456 </conditional>
457 <conditional name="method"> 362 <conditional name="method">
458 <param name="method" value="pp.subsample"/> 363 <param name="method" value="pp.subsample"/>
459 <conditional name="type"> 364 <conditional name="type">
460 <param name="type" value="n_obs" /> 365 <param name="type" value="n_obs" />
461 <param name="n_obs" value="10"/> 366 <param name="n_obs" value="10"/>
462 </conditional> 367 </conditional>
463 <param name="random_state" value="0"/> 368 <param name="random_state" value="0"/>
464 </conditional> 369 </conditional>
465 <conditional name="modify_anndata">
466 <param name="modify_anndata" value="true"/>
467 <param name="anndata_output_format" value="h5ad" />
468 </conditional>
469 <assert_stdout> 370 <assert_stdout>
470 <has_text_matching expression="sc.pp.subsample"/> 371 <has_text_matching expression="sc.pp.subsample"/>
471 <has_text_matching expression="n_obs=10"/> 372 <has_text_matching expression="n_obs=10"/>
472 <has_text_matching expression="random_state=0"/> 373 <has_text_matching expression="random_state=0"/>
473 </assert_stdout> 374 </assert_stdout>
474 <output name="anndata_out_h5ad" file="pp.subsample.krumsiek11_n_obs.h5ad" ftype="h5" compare="sim_size"/> 375 <output name="anndata_out" file="pp.subsample.krumsiek11_n_obs.h5ad" ftype="h5ad" compare="sim_size"/>
376 </test>
377 <test>
378 <!-- test 9 -->
379 <param name="adata" value="random-randint.h5ad" />
380 <conditional name="method">
381 <param name="method" value="pp.downsample_counts"/>
382 <param name="total_counts" value="20000"/>
383 <param name="random_state" value="0"/>
384 <param name="replace" value="false"/>
385 </conditional>
386 <assert_stdout>
387 <has_text_matching expression="sc.pp.downsample_counts"/>
388 <has_text_matching expression="total_counts=20000"/>
389 <has_text_matching expression="random_state=0"/>
390 <has_text_matching expression="replace=False"/>
391 </assert_stdout>
392 <output name="anndata_out" file="pp.downsample_counts.random-randint.h5ad" ftype="h5ad" compare="sim_size"/>
475 </test> 393 </test>
476 </tests> 394 </tests>
477 <help><![CDATA[ 395 <help><![CDATA[
478 396
479 Filter cells outliers based on counts and numbers of genes expressed (`pp.filter_cells`) 397 Filter cells outliers based on counts and numbers of genes expressed (`pp.filter_cells`)
485 403
486 Only provide one of the optional parameters `min_counts`, `min_genes`, 404 Only provide one of the optional parameters `min_counts`, `min_genes`,
487 `max_counts`, `max_genes` per call. 405 `max_counts`, `max_genes` per call.
488 406
489 More details on the `scanpy documentation 407 More details on the `scanpy documentation
490 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_cells.html#scanpy.api.pp.filter_cells>`__ 408 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.filter_cells.html>`__
491
492 Return
493 ------
494
495 number_per_cell : Number per cell (either `n_counts` or `n_genes` per cell)
496 409
497 410
498 Filter genes based on number of cells or counts (`pp.filter_genes`) 411 Filter genes based on number of cells or counts (`pp.filter_genes`)
499 =================================================================== 412 ===================================================================
500 413
504 417
505 Only provide one of the optional parameters `min_counts`, `min_cells`, 418 Only provide one of the optional parameters `min_counts`, `min_cells`,
506 `max_counts`, `max_cells` per call. 419 `max_counts`, `max_cells` per call.
507 420
508 More details on the `scanpy documentation 421 More details on the `scanpy documentation
509 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_genes.html#scanpy.api.pp.filter_genes>`__ 422 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.filter_genes.html>`__
510 423
511 Return 424
512 ------ 425 Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories (`tl.filter_rank_genes_groups`)
513 426 ==========================================================================================================================================================
514 number_per_gene : Number per genes (either `n_counts` or `n_genes` per cell)
515
516
517 Extract highly variable genes (`pp.filter_genes_dispersion`)
518 ============================================================
519
520 If trying out parameters, pass the data matrix instead of AnnData.
521
522 Depending on `flavor`, this reproduces the R-implementations of Seurat and Cell Ranger.
523
524 The normalized dispersion is obtained by scaling with the mean and standard
525 deviation of the dispersions for genes falling into a given bin for mean
526 expression of genes. This means that for each bin of mean expression, highly
527 variable genes are selected.
528
529 Use `flavor='cell_ranger'` with care and in the same way as in `pp.recipe_zheng17`.
530 427
531 More details on the `scanpy documentation 428 More details on the `scanpy documentation
532 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_genes_dispersion.html#scanpy.api.pp.filter_genes_dispersion>`__ 429 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.filter_rank_genes_groups.html>`__
533 430
534 Returns 431
535 ------- 432 Annotate highly variable genes (`pp.highly_variable_genes`)
536 - The annotated matrix filtered, with the annotations 433 ===========================================================
537 - A table with the means, dispersions, and normalized dispersions per gene, logarithmized when `log` is `True`. 434
435 It expects logarithmized data.
436
437 Depending on flavor, this reproduces the R-implementations of Seurat or Cell Ranger. The normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for genes falling into a given bin for mean expression of genes. This means that for each bin of mean expression, highly variable genes are selected.
538 438
539 439
540 Subsample to a fraction of the number of observations (`pp.subsample`) 440 Subsample to a fraction of the number of observations (`pp.subsample`)
541 ====================================================================== 441 ======================================================================
542 442
543 More details on the `scanpy documentation 443 More details on the `scanpy documentation
544 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.subsample.html#scanpy.api.pp.subsample>`__ 444 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.subsample.html>`__
445
446 Downsample counts (`pp.downsample_counts`)
447 ==========================================
448
449 Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This
450 has been implemented by M. D. Luecken.
451
452 More details on the `scanpy documentation
453 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.downsample_counts.html>`__
545 454
546 455
547 ]]></help> 456 ]]></help>
548 <expand macro="citations"/> 457 <expand macro="citations"/>
549 </tool> 458 </tool>