Mercurial > repos > iuc > scanpy_filter
comparison filter.xml @ 1:6a76b60e05f5 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 8ef5f7c6f8728608a3f05bb51e11b642b84a05f5"
author | iuc |
---|---|
date | Wed, 16 Oct 2019 06:32:33 -0400 |
parents | 6ea5a05a260a |
children | e62673c32a5d |
comparison
equal
deleted
inserted
replaced
0:6ea5a05a260a | 1:6a76b60e05f5 |
---|---|
1 <tool id="scanpy_filter" name="Filter with scanpy" version="@galaxy_version@"> | 1 <tool id="scanpy_filter" name="Filter" version="@galaxy_version@"> |
2 <description></description> | 2 <description>with scanpy</description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="requirements"/> | 6 <expand macro="requirements"/> |
7 <expand macro="version_command"/> | 7 <expand macro="version_command"/> |
12 <configfile name="script_file"><![CDATA[ | 12 <configfile name="script_file"><![CDATA[ |
13 @CMD_imports@ | 13 @CMD_imports@ |
14 @CMD_read_inputs@ | 14 @CMD_read_inputs@ |
15 | 15 |
16 #if $method.method == 'pp.filter_cells' | 16 #if $method.method == 'pp.filter_cells' |
17 res = sc.pp.filter_cells( | 17 sc.pp.filter_cells( |
18 #if $modify_anndata.modify_anndata == 'true' | |
19 adata, | 18 adata, |
20 #else | |
21 adata.X, | |
22 #end if | |
23 #if $method.filter.filter == 'min_counts' | 19 #if $method.filter.filter == 'min_counts' |
24 min_counts=$method.filter.min_counts, | 20 min_counts=$method.filter.min_counts, |
25 #elif $method.filter.filter == 'max_counts' | 21 #else if $method.filter.filter == 'max_counts' |
26 max_counts=$method.filter.max_counts, | 22 max_counts=$method.filter.max_counts, |
27 #elif $method.filter.filter == 'min_genes' | 23 #else if $method.filter.filter == 'min_genes' |
28 min_genes=$method.filter.min_genes, | 24 min_genes=$method.filter.min_genes, |
29 #elif $method.filter.filter == 'max_genes' | 25 #else if $method.filter.filter == 'max_genes' |
30 max_genes=$method.filter.max_genes, | 26 max_genes=$method.filter.max_genes, |
31 #end if | 27 #end if |
32 copy=False) | 28 copy=False) |
33 | 29 |
34 #if $modify_anndata.modify_anndata == 'true' | 30 #else if $method.method == 'pp.filter_genes' |
35 df = adata.obs | 31 sc.pp.filter_genes( |
36 #else | |
37 df = pd.DataFrame(data=dict(cell_subset=res[0], number_per_cell=res[1])) | |
38 #end if | |
39 | |
40 #if $method.filter.filter == 'min_counts' or $method.filter.filter == 'max_counts' | |
41 df.to_csv('$counts_per_cell', sep='\t') | |
42 #elif $method.filter.filter == 'min_genes' or $method.filter.filter == 'max_genes' | |
43 df.to_csv('$genes_per_cell', sep='\t') | |
44 #end if | |
45 | |
46 #elif $method.method == 'pp.filter_genes' | |
47 res = sc.pp.filter_genes( | |
48 #if $modify_anndata.modify_anndata == 'true' | |
49 adata, | 32 adata, |
50 #else | |
51 adata.X, | |
52 #end if | |
53 #if $method.filter.filter == 'min_counts' | 33 #if $method.filter.filter == 'min_counts' |
54 min_counts=$method.filter.min_counts, | 34 min_counts=$method.filter.min_counts, |
55 #elif $method.filter.filter == 'max_counts' | 35 #else if $method.filter.filter == 'max_counts' |
56 max_counts=$method.filter.max_counts, | 36 max_counts=$method.filter.max_counts, |
57 #elif $method.filter.filter == 'min_cells' | 37 #else if $method.filter.filter == 'min_cells' |
58 min_cells=$method.filter.min_cells, | 38 min_cells=$method.filter.min_cells, |
59 #elif $method.filter.filter == 'max_cells' | 39 #else if $method.filter.filter == 'max_cells' |
60 max_cells=$method.filter.max_cells, | 40 max_cells=$method.filter.max_cells, |
61 #end if | 41 #end if |
62 copy=False) | 42 copy=False) |
63 | 43 |
64 #if $modify_anndata.modify_anndata == 'true' | 44 #else if $method.method == 'tl.filter_rank_genes_groups' |
65 df = adata.var | 45 sc.tl.filter_rank_genes_groups( |
66 #else | |
67 df = pd.DataFrame(data=dict(gene_subset=res[0], number_per_gene=res[1])) | |
68 #end if | |
69 | |
70 #if $method.filter.filter == 'min_counts' or $method.filter.filter == 'max_counts' | |
71 df.to_csv('$counts_per_gene', sep='\t') | |
72 #elif $method.filter.filter == 'min_cells' or $method.filter.filter == 'max_cells' | |
73 df.to_csv('$cells_per_gene', sep='\t') | |
74 #end if | |
75 | |
76 #elif $method.method == 'pp.filter_genes_dispersion' | |
77 res = sc.pp.filter_genes_dispersion( | |
78 #if $modify_anndata.modify_anndata == 'true' | |
79 adata, | 46 adata, |
80 #else | 47 #if str($method.key) != '' |
81 adata.X, | 48 key='$method.key', |
82 #end if | 49 #end if |
50 #if str($method.groupby) != '' | |
51 groupby='$method.groupby', | |
52 #end if | |
53 use_raw=$method.use_raw, | |
54 log=$method.log, | |
55 key_added='$method.key_added', | |
56 min_in_group_fraction=$method.min_in_group_fraction, | |
57 max_out_group_fraction=$method.max_out_group_fraction, | |
58 min_fold_change=$method.min_fold_change) | |
59 | |
60 #else if $method.method == "pp.highly_variable_genes" | |
61 sc.pp.highly_variable_genes( | |
62 adata=adata, | |
83 flavor='$method.flavor.flavor', | 63 flavor='$method.flavor.flavor', |
84 #if $method.flavor.flavor=='seurat' | 64 #if $method.flavor.flavor == 'seurat' |
65 #if str($method.flavor.min_mean) != '' | |
85 min_mean=$method.flavor.min_mean, | 66 min_mean=$method.flavor.min_mean, |
67 #end if | |
68 #if str($method.flavor.max_mean) != '' | |
86 max_mean=$method.flavor.max_mean, | 69 max_mean=$method.flavor.max_mean, |
70 #end if | |
71 #if str($method.flavor.min_disp) != '' | |
87 min_disp=$method.flavor.min_disp, | 72 min_disp=$method.flavor.min_disp, |
88 #if $method.flavor.max_disp | 73 #end if |
74 #if str($method.flavor.max_disp) != '' | |
89 max_disp=$method.flavor.max_disp, | 75 max_disp=$method.flavor.max_disp, |
90 #end if | 76 #end if |
91 #else | 77 #else if $method.flavor.flavor == 'cell_ranger' |
92 n_top_genes=$method.flavor.n_top_genes, | 78 n_top_genes=$method.flavor.n_top_genes, |
93 #end if | 79 #end if |
94 n_bins=$method.n_bins, | 80 n_bins=$method.n_bins, |
95 log=$method.log, | 81 subset=$method.subset, |
96 copy=False) | 82 inplace=True) |
97 | 83 |
98 #if $modify_anndata.modify_anndata == 'true' | 84 #else if $method.method == 'pp.subsample' |
99 adata.var.to_csv('$per_gene', sep='\t') | |
100 #else | |
101 pd.DataFrame(res).to_csv('$per_gene', sep='\t') | |
102 #end if | |
103 | |
104 #elif $method.method == 'pp.subsample' | |
105 sc.pp.subsample( | 85 sc.pp.subsample( |
106 data=adata, | 86 data=adata, |
107 #if $method.type.type == 'fraction' | 87 #if $method.type.type == 'fraction' |
108 fraction=$method.type.fraction, | 88 fraction=$method.type.fraction, |
109 #else if $method.type.type == 'n_obs' | 89 #else if $method.type.type == 'n_obs' |
110 n_obs=$method.type.n_obs, | 90 n_obs=$method.type.n_obs, |
111 #end if | 91 #end if |
112 random_state=$method.random_state, | 92 random_state=$method.random_state, |
113 copy=False) | 93 copy=False) |
114 | 94 |
95 #else if $method.method == "pp.downsample_counts" | |
96 sc.pp.downsample_counts( | |
97 adata=adata, | |
98 #if str($method.counts_per_cell) != '' | |
99 counts_per_cell=$method.counts_per_cell, | |
100 #end if | |
101 #if str($method.total_counts) != '' | |
102 total_counts=$method.total_counts, | |
103 #end if | |
104 random_state=$method.random_state, | |
105 replace=$method.replace, | |
106 copy=False) | |
115 #end if | 107 #end if |
116 | 108 |
117 @CMD_anndata_write_modify_outputs@ | 109 @CMD_anndata_write_outputs@ |
118 ]]></configfile> | 110 ]]></configfile> |
119 </configfiles> | 111 </configfiles> |
120 <inputs> | 112 <inputs> |
121 <expand macro="inputs_anndata"/> | 113 <expand macro="inputs_anndata"/> |
122 <conditional name="method"> | 114 <conditional name="method"> |
123 <param argument="method" type="select" label="Method used for filtering"> | 115 <param argument="method" type="select" label="Method used for filtering"> |
124 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using `pp.filter_cells`</option> | 116 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using `pp.filter_cells`</option> |
125 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using `pp.filter_genes`</option> | 117 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using `pp.filter_genes`</option> |
126 <option value="pp.filter_genes_dispersion">Extract highly variable genes, using `pp.filter_genes_dispersion`</option> | 118 <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using `tl.filter_rank_genes_groups`</option> |
127 <!--<option value="pp.highly_variable_genes">, using `tl.highly_variable_genes`</option>!--> | 119 <option value="pp.highly_variable_genes">, using `tl.highly_variable_genes`</option> |
128 <option value="pp.subsample">Subsample to a fraction of the number of observations, using `pp.subsample`</option> | 120 <option value="pp.subsample">Subsample to a fraction of the number of observations, using `pp.subsample`</option> |
129 <!--<option value="queries.gene_coordinates">, using `queries.gene_coordinates`</option>!--> | 121 <option value="pp.downsample_counts">Downsample counts from count matrix, using `pp.downsample_counts`</option> |
130 <!--<option value="queries.mitochondrial_genes">, using `queries.mitochondrial_genes`</option>!--> | |
131 </param> | 122 </param> |
132 <when value="pp.filter_cells"> | 123 <when value="pp.filter_cells"> |
133 <conditional name="filter"> | 124 <conditional name="filter"> |
134 <param argument="filter" type="select" label="Filter"> | 125 <param argument="filter" type="select" label="Filter"> |
135 <option value="min_counts">Minimum number of counts</option> | 126 <option value="min_counts">Minimum number of counts</option> |
158 <option value="max_counts">Maximum number of counts</option> | 149 <option value="max_counts">Maximum number of counts</option> |
159 <option value="min_cells">Minimum number of cells expressed</option> | 150 <option value="min_cells">Minimum number of cells expressed</option> |
160 <option value="max_cells">Maximum number of cells expressed</option> | 151 <option value="max_cells">Maximum number of cells expressed</option> |
161 </param> | 152 </param> |
162 <when value="min_counts"> | 153 <when value="min_counts"> |
163 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering" help=""/> | 154 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering"/> |
164 </when> | 155 </when> |
165 <when value="max_counts"> | 156 <when value="max_counts"> |
166 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering" help=""/> | 157 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering"/> |
167 </when> | 158 </when> |
168 <when value="min_cells"> | 159 <when value="min_cells"> |
169 <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering" help=""/> | 160 <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering"/> |
170 </when> | 161 </when> |
171 <when value="max_cells"> | 162 <when value="max_cells"> |
172 <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering" help=""/> | 163 <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering"/> |
173 </when> | 164 </when> |
174 </conditional> | 165 </conditional> |
175 </when> | 166 </when> |
176 <when value="pp.filter_genes_dispersion"> | 167 <when value="tl.filter_rank_genes_groups"> |
168 <param argument="key" type="text" optional="true" label="Key in adata.uns where the rank_genes_groups output is stored"/> | |
169 <param argument="groupby" type="text" optional="true" label="The key of the observations grouping to consider"/> | |
170 <expand macro="param_use_raw"/> | |
171 <expand macro="param_log"/> | |
172 <param argument="key_added" type="text" value="rank_genes_groups_filtered" label="Key that will contain new values"/> | |
173 <param argument="min_in_group_fraction" type="float" min="0" max="1" value="0.25" label="Minimum fraction of genes expressing the gene within the categories"/> | |
174 <param argument="max_out_group_fraction" type="float" min="0" max="1" value="0.5" label="Maximum fraction of genes expressing the gene outside the categories"/> | |
175 <param argument="min_fold_change" type="integer" value="2" label="Minimum fold change"/> | |
176 </when> | |
177 <when value="pp.highly_variable_genes"> | |
177 <conditional name='flavor'> | 178 <conditional name='flavor'> |
178 <param argument="flavor" type="select" label="Flavor for computing normalized dispersion" help=""> | 179 <param argument="flavor" type="select" label="Flavor for computing normalized dispersion"> |
179 <option value="seurat">seurat: expects non-logarithmized data</option> | 180 <option value="seurat">seurat: expects non-logarithmized data</option> |
180 <option value="cell_ranger">cell_ranger: usually called for logarithmized data</option> | 181 <option value="cell_ranger">cell_ranger: usually called for logarithmized data</option> |
181 </param> | 182 </param> |
182 <when value="seurat"> | 183 <when value="seurat"> |
183 <param argument="min_mean" type="float" value="0.0125" label="Minimal mean cutoff" help=""/> | 184 <param argument="min_mean" type="float" value="0.0125" label="Minimal mean cutoff"/> |
184 <param argument="max_mean" type="float" value="3" label="Maximal mean cutoff" help=""/> | 185 <param argument="max_mean" type="float" value="3" label="Maximal mean cutoff"/> |
185 <param argument="min_disp" type="float" value="0.5" label="Minimal normalized dispersion cutoff" help=""/> | 186 <param argument="min_disp" type="float" value="0.5" label="Minimal normalized dispersion cutoff"/> |
186 <param argument="max_disp" type="float" value="" optional="true" label="Maximal normalized dispersion cutoff" help=""/> | 187 <param argument="max_disp" type="float" value="" optional="true" label="Maximal normalized dispersion cutoff"/> |
187 </when> | 188 </when> |
188 <when value="cell_ranger"> | 189 <when value="cell_ranger"> |
189 <param argument="n_top_genes" type="integer" value="" label="Number of highly-variable genes to keep" help=""/> | 190 <param argument="n_top_genes" type="integer" value="" label="Number of highly-variable genes to keep"/> |
190 </when> | 191 </when> |
191 </conditional> | 192 </conditional> |
192 <param argument="n_bins" type="integer" value="20" label="Number of bins for binning the mean gene expression" help="Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1"/> | 193 <param argument="n_bins" type="integer" value="20" label="Number of bins for binning the mean gene expression" help="Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1"/> |
193 <expand macro="param_log"/> | 194 <param argument="subset" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Inplace subset to highly-variable genes?" help="Otherwise it merely indicates highly variable genes."/> |
194 </when> | 195 </when> |
195 <when value="pp.subsample"> | 196 <when value="pp.subsample"> |
196 <conditional name="type"> | 197 <conditional name="type"> |
197 <param name="type" type="select" label="Type of subsampling"> | 198 <param name="type" type="select" label="Type of subsampling"> |
198 <option value="fraction">By fraction</option> | 199 <option value="fraction">By fraction</option> |
199 <option value="n_obs">By number of observation</option> | 200 <option value="n_obs">By number of observation</option> |
200 </param> | 201 </param> |
201 <when value="fraction"> | 202 <when value="fraction"> |
202 <param argument="fraction" type="float" value="" label="Subsample to this `fraction` of the number of observations" help=""/> | 203 <param argument="fraction" type="float" value="" label="Subsample to this `fraction` of the number of observations"/> |
203 </when> | 204 </when> |
204 <when value="n_obs"> | 205 <when value="n_obs"> |
205 <param argument="n_obs" type="integer" min="0" value="" label="Subsample to this number of observations" help=""/> | 206 <param argument="n_obs" type="integer" min="0" value="" label="Subsample to this number of observations"/> |
206 </when> | 207 </when> |
207 </conditional> | 208 </conditional> |
208 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling" help=""/> | 209 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/> |
210 </when> | |
211 <when value="pp.downsample_counts"> | |
212 <param argument="counts_per_cell" type="integer" min="0" optional="true" label="Target total counts per cell" help="If a cell has more than ‘counts_per_cell’, it will be downsampled to this number. Resulting counts can be specified on a per cell basis by passing an array."/> | |
213 <param argument="total_counts" type="integer" min="0" optional="true" label="Target total counts" help="If the count matrix has more than total_counts it will be downsampled to have this number."/> | |
214 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/> | |
215 <param argument="replace" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Sample the counts with replacement?"/> | |
209 </when> | 216 </when> |
210 </conditional> | 217 </conditional> |
211 <expand macro="anndata_modify_output_input"/> | |
212 </inputs> | 218 </inputs> |
213 <outputs> | 219 <outputs> |
214 <expand macro="anndata_modify_outputs"/> | 220 <expand macro="anndata_outputs"/> |
215 <!-- for pp.filter_cells --> | |
216 <data name="counts_per_cell" format="tabular" label="${tool.name} on ${on_string}: Counts per cells after filtering"> | |
217 <filter>method['method'] == 'pp.filter_cells' and (method['filter']['filter'] == 'min_counts' or method['filter']['filter'] == 'max_counts')</filter> | |
218 </data> | |
219 <data name="genes_per_cell" format="tabular" label="${tool.name} on ${on_string}: Number of genes per cell after filtering"> | |
220 <filter>method['method'] == 'pp.filter_cells' and (method['filter']['filter'] == 'min_genes' or method['filter']['filter'] == 'max_genes')</filter> | |
221 </data> | |
222 <!-- for pp.filter_genes --> | |
223 <data name="counts_per_gene" format="tabular" label="${tool.name} on ${on_string}: Counts per genes after filtering"> | |
224 <filter>method['method'] == 'pp.filter_genes' and (method['filter']['filter'] == 'min_counts' or method['filter']['filter'] == 'max_counts')</filter> | |
225 </data> | |
226 <data name="cells_per_gene" format="tabular" label="${tool.name} on ${on_string}: Number of cells per genes after filtering"> | |
227 <filter>method['method'] == 'pp.filter_genes' and (method['filter']['filter'] == 'min_cells' or method['filter']['filter'] == 'max_cells')</filter> | |
228 </data> | |
229 <!-- for pp.filter_genes_dispersion --> | |
230 <data name="per_gene" format="tabular" label="${tool.name} on ${on_string}: Means, dispersions and normalized dispersions per gene"> | |
231 <filter>method['method'] == 'pp.filter_genes_dispersion'</filter> | |
232 </data> | |
233 </outputs> | 221 </outputs> |
234 <tests> | 222 <tests> |
235 <test expect_num_outputs="2"> | 223 <test> |
236 <conditional name="input"> | 224 <!-- test 1 --> |
237 <param name="format" value="h5ad" /> | 225 <param name="adata" value="krumsiek11.h5ad" /> |
238 <param name="adata" value="krumsiek11.h5ad" /> | |
239 </conditional> | |
240 <conditional name="method"> | 226 <conditional name="method"> |
241 <param name="method" value="pp.filter_cells"/> | 227 <param name="method" value="pp.filter_cells"/> |
242 <conditional name="filter"> | 228 <conditional name="filter"> |
243 <param name="filter" value="min_counts"/> | 229 <param name="filter" value="min_counts"/> |
244 <param name="min_counts" value="3"/> | 230 <param name="min_counts" value="3"/> |
245 </conditional> | 231 </conditional> |
246 </conditional> | 232 </conditional> |
247 <conditional name="modify_anndata"> | |
248 <param name="modify_anndata" value="true"/> | |
249 <param name="anndata_output_format" value="h5ad" /> | |
250 </conditional> | |
251 <assert_stdout> | 233 <assert_stdout> |
252 <has_text_matching expression="sc.pp.filter_cells"/> | 234 <has_text_matching expression="sc.pp.filter_cells"/> |
253 <has_text_matching expression="min_counts=3"/> | 235 <has_text_matching expression="min_counts=3"/> |
254 </assert_stdout> | 236 </assert_stdout> |
255 <output name="anndata_out_h5ad" file="pp.filter_cells.krumsiek11-min_counts.h5ad" ftype="h5" compare="sim_size"/> | 237 <output name="anndata_out" file="pp.filter_cells.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/> |
256 <output name="counts_per_cell"> | 238 </test> |
257 <assert_contents> | 239 <test> |
258 <has_text_matching expression="cell_type\tn_counts" /> | 240 <!-- test 2 --> |
259 <has_text_matching expression="46\tprogenitor\t3.028" /> | 241 <param name="adata" value="krumsiek11.h5ad" /> |
260 <has_text_matching expression="85\tEry\t3.7001" /> | |
261 <has_text_matching expression="150\tMk\t4.095" /> | |
262 <has_n_columns n="3" /> | |
263 </assert_contents> | |
264 </output> | |
265 </test> | |
266 <test expect_num_outputs="2"> | |
267 <conditional name="input"> | |
268 <param name="format" value="loom" /> | |
269 <param name="adata" value="krumsiek11.loom" /> | |
270 <param name="sparse" value="True"/> | |
271 <param name="cleanup" value="False"/> | |
272 <param name="x_name" value="spliced"/> | |
273 <param name="obs_names" value="CellID" /> | |
274 <param name="var_names" value="Gene"/> | |
275 </conditional> | |
276 <conditional name="method"> | |
277 <param name="method" value="pp.filter_cells"/> | |
278 <conditional name="filter"> | |
279 <param name="filter" value="min_counts"/> | |
280 <param name="min_counts" value="3"/> | |
281 </conditional> | |
282 </conditional> | |
283 <conditional name="modify_anndata"> | |
284 <param name="modify_anndata" value="true"/> | |
285 <param name="anndata_output_format" value="loom" /> | |
286 </conditional> | |
287 <assert_stdout> | |
288 <has_text_matching expression="sc.pp.filter_cells"/> | |
289 <has_text_matching expression="min_counts=3"/> | |
290 </assert_stdout> | |
291 <output name="anndata_out_loom" file="pp.filter_cells.krumsiek11-min_counts.loom" ftype="loom" compare="sim_size"/> | |
292 <output name="counts_per_cell"> | |
293 <assert_contents> | |
294 <has_text_matching expression="cell_type\tn_counts" /> | |
295 <has_text_matching expression="46\tprogenitor\t3.028" /> | |
296 <has_text_matching expression="85\tEry\t3.7001" /> | |
297 <has_text_matching expression="97\tMo\t3.925" /> | |
298 <has_text_matching expression="150\tMk\t4.095" /> | |
299 <has_n_columns n="3" /> | |
300 </assert_contents> | |
301 </output> | |
302 </test> | |
303 <test expect_num_outputs="1"> | |
304 <conditional name="input"> | |
305 <param name="format" value="h5ad" /> | |
306 <param name="adata" value="krumsiek11.h5ad"/> | |
307 </conditional> | |
308 <conditional name="method"> | 242 <conditional name="method"> |
309 <param name="method" value="pp.filter_cells"/> | 243 <param name="method" value="pp.filter_cells"/> |
310 <conditional name="filter"> | 244 <conditional name="filter"> |
311 <param name="filter" value="max_genes"/> | 245 <param name="filter" value="max_genes"/> |
312 <param name="max_genes" value="100"/> | 246 <param name="max_genes" value="100"/> |
313 </conditional> | 247 </conditional> |
314 </conditional> | 248 </conditional> |
315 <conditional name="modify_anndata"> | |
316 <param name="modify_anndata" value="false"/> | |
317 </conditional> | |
318 <assert_stdout> | 249 <assert_stdout> |
319 <has_text_matching expression="sc.pp.filter_cells"/> | 250 <has_text_matching expression="sc.pp.filter_cells"/> |
320 <has_text_matching expression="adata.X"/> | 251 <has_text_matching expression="adata"/> |
321 <has_text_matching expression="max_genes=100"/> | 252 <has_text_matching expression="max_genes=100"/> |
322 </assert_stdout> | 253 </assert_stdout> |
323 <output name="genes_per_cell" file="pp.filter_cells.number_per_cell.krumsiek11-max_genes.tabular"/> | 254 <output name="anndata_out" file="pp.filter_cells.krumsiek11-max_genes.h5ad" ftype="h5ad" compare="sim_size"/> |
324 </test> | 255 </test> |
325 <test expect_num_outputs="2"> | 256 <test> |
326 <conditional name="input"> | 257 <!-- test 3 --> |
327 <param name="format" value="h5ad" /> | 258 <param name="adata" value="krumsiek11.h5ad" /> |
328 <param name="adata" value="krumsiek11.h5ad" /> | |
329 </conditional> | |
330 <conditional name="method"> | 259 <conditional name="method"> |
331 <param name="method" value="pp.filter_genes"/> | 260 <param name="method" value="pp.filter_genes"/> |
332 <conditional name="filter"> | 261 <conditional name="filter"> |
333 <param name="filter" value="min_counts"/> | 262 <param name="filter" value="min_counts"/> |
334 <param name="min_counts" value="3"/> | 263 <param name="min_counts" value="3"/> |
335 </conditional> | 264 </conditional> |
336 </conditional> | 265 </conditional> |
337 <conditional name="modify_anndata"> | |
338 <param name="modify_anndata" value="true"/> | |
339 <param name="anndata_output_format" value="h5ad" /> | |
340 </conditional> | |
341 <assert_stdout> | 266 <assert_stdout> |
342 <has_text_matching expression="sc.pp.filter_genes"/> | 267 <has_text_matching expression="sc.pp.filter_genes"/> |
343 <has_text_matching expression="min_counts=3"/> | 268 <has_text_matching expression="min_counts=3"/> |
344 </assert_stdout> | 269 </assert_stdout> |
345 <output name="anndata_out_h5ad" file="pp.filter_genes.krumsiek11-min_counts.h5ad" ftype="h5" compare="sim_size"/> | 270 <output name="anndata_out" file="pp.filter_genes.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/> |
346 <output name="counts_per_gene" file="pp.filter_genes.number_per_gene.krumsiek11-min_counts.tabular"/> | 271 </test> |
347 </test> | 272 <test> |
348 <test expect_num_outputs="1"> | 273 <!-- test 4 --> |
349 <conditional name="input"> | 274 <param name="adata" value="tl.rank_genes_groups.krumsiek11.h5ad" /> |
350 <param name="format" value="h5ad" /> | 275 <conditional name="method"> |
351 <param name="adata" value="pbmc68k_reduced.h5ad"/> | 276 <param name="method" value="tl.filter_rank_genes_groups"/> |
352 </conditional> | 277 <param name="key" value="rank_genes_groups"/> |
353 <conditional name="method"> | 278 <param name="use_raw" value="False"/> |
354 <param name="method" value="pp.filter_genes"/> | 279 <param name="log" value="False"/> |
355 <conditional name="filter"> | 280 <param name="key_added" value="rank_genes_groups_filtered"/> |
356 <param name="filter" value="max_cells"/> | 281 <param name="min_in_group_fraction" value="0.25"/> |
357 <param name="max_cells" value="500"/> | 282 <param name="max_out_group_fraction" value="0.5"/> |
358 </conditional> | 283 <param name="min_fold_change" value="3"/> |
359 </conditional> | 284 </conditional> |
360 <conditional name="modify_anndata"> | 285 <assert_stdout> |
361 <param name="modify_anndata" value="false"/> | 286 <has_text_matching expression="tl.filter_rank_genes_groups"/> |
362 </conditional> | 287 <has_text_matching expression="key='rank_genes_groups'"/> |
363 <assert_stdout> | 288 <has_text_matching expression="use_raw=False"/> |
364 <has_text_matching expression="sc.pp.filter_genes"/> | 289 <has_text_matching expression="log=False"/> |
365 <has_text_matching expression="adata.X"/> | 290 <has_text_matching expression="key_added='rank_genes_groups_filtered'"/> |
366 <has_text_matching expression="max_cells=500"/> | 291 <has_text_matching expression="min_in_group_fraction=0.25"/> |
367 </assert_stdout> | 292 <has_text_matching expression="max_out_group_fraction=0.5"/> |
368 <output name="cells_per_gene" file="pp.filter_genes.number_per_gene.pbmc68k_reduced-max_cells.tabular"/> | 293 <has_text_matching expression="min_fold_change=3"/> |
369 </test> | 294 </assert_stdout> |
370 <test expect_num_outputs="2"> | 295 <output name="anndata_out" file="pp.filter_rank_genes_groups.h5ad" ftype="h5ad" compare="sim_size"/> |
371 <conditional name="input"> | 296 </test> |
372 <param name="format" value="h5ad" /> | 297 <test> |
373 <param name="adata" value="krumsiek11.h5ad" /> | 298 <!-- test 5 --> |
374 </conditional> | 299 <param name="adata" value="blobs.h5ad"/> |
375 <conditional name="method"> | 300 <conditional name="method"> |
376 <param name="method" value="pp.filter_genes_dispersion"/> | 301 <param name="method" value="pp.highly_variable_genes"/> |
377 <conditional name="flavor"> | 302 <conditional name="flavor"> |
378 <param name="flavor" value="seurat"/> | 303 <param name="flavor" value="seurat"/> |
379 <param name="min_mean" value="0.0125"/> | 304 <param name="min_mean" value="0.0125"/> |
380 <param name="max_mean" value="3"/> | 305 <param name="max_mean" value="3"/> |
381 <param name="min_disp" value="0.5"/> | 306 <param name="min_disp" value="0.5"/> |
382 </conditional> | 307 </conditional> |
383 <param name="n_bins" value="20" /> | 308 <param name="n_bins" value="20"/> |
384 <param name="log" value="true"/> | 309 <param name="subset" value="false"/> |
385 </conditional> | 310 </conditional> |
386 <conditional name="modify_anndata"> | 311 <assert_stdout> |
387 <param name="modify_anndata" value="true"/> | 312 <has_text_matching expression="sc.pp.highly_variable_genes"/> |
388 <param name="anndata_output_format" value="h5ad" /> | |
389 </conditional> | |
390 <assert_stdout> | |
391 <has_text_matching expression="sc.pp.filter_genes_dispersion"/> | |
392 <has_text_matching expression="flavor='seurat'"/> | 313 <has_text_matching expression="flavor='seurat'"/> |
393 <has_text_matching expression="min_mean=0.0125"/> | 314 <has_text_matching expression="min_mean=0.0125"/> |
394 <has_text_matching expression="max_mean=3.0"/> | 315 <has_text_matching expression="max_mean=3"/> |
395 <has_text_matching expression="min_disp=0.5"/> | 316 <has_text_matching expression="min_disp=0.5"/> |
396 <has_text_matching expression="n_bins=20"/> | 317 <has_text_matching expression="n_bins=20"/> |
397 <has_text_matching expression="log=True"/> | 318 <has_text_matching expression="subset=False"/> |
398 </assert_stdout> | 319 </assert_stdout> |
399 <output name="anndata_out_h5ad" file="pp.filter_genes_dispersion.krumsiek11-seurat.h5ad" ftype="h5" compare="sim_size"/> | 320 <output name="anndata_out" file="pp.highly_variable_genes.seurat.blobs.h5ad" ftype="h5ad" compare="sim_size"/> |
400 <output name="per_gene" file="pp.filter_genes_dispersion.per_gene.krumsiek11-seurat.tabular"/> | 321 </test> |
401 </test> | 322 <test> |
402 <test expect_num_outputs="1"> | 323 <!-- test 6 --> |
403 <conditional name="input"> | 324 <param name="adata" value="krumsiek11.h5ad" /> |
404 <param name="format" value="h5ad" /> | 325 <conditional name="method"> |
405 <param name="adata" value="krumsiek11.h5ad" /> | 326 <param name="method" value="pp.highly_variable_genes"/> |
406 </conditional> | |
407 <conditional name="method"> | |
408 <param name="method" value="pp.filter_genes_dispersion"/> | |
409 <conditional name="flavor"> | 327 <conditional name="flavor"> |
410 <param name="flavor" value="cell_ranger"/> | 328 <param name="flavor" value="cell_ranger"/> |
411 <param name="n_top_genes" value="2"/> | 329 <param name="n_top_genes" value="2"/> |
412 </conditional> | 330 </conditional> |
413 <param name="n_bins" value="20"/> | 331 <param name="n_bins" value="20"/> |
414 <param name="log" value="true"/> | 332 </conditional> |
415 </conditional> | 333 <assert_stdout> |
416 <conditional name="modify_anndata"> | 334 <has_text_matching expression="sc.pp.highly_variable_genes"/> |
417 <param name="modify_anndata" value="false"/> | |
418 </conditional> | |
419 <assert_stdout> | |
420 <has_text_matching expression="sc.pp.filter_genes_dispersion"/> | |
421 <has_text_matching expression="flavor='cell_ranger'"/> | 335 <has_text_matching expression="flavor='cell_ranger'"/> |
422 <has_text_matching expression="n_top_genes=2"/> | 336 <has_text_matching expression="n_top_genes=2"/> |
423 <has_text_matching expression="n_bins=20"/> | 337 <has_text_matching expression="n_bins=20"/> |
424 <has_text_matching expression="og=True"/> | 338 </assert_stdout> |
425 </assert_stdout> | 339 <output name="anndata_out" file="pp.highly_variable_genes.krumsiek11-cell_ranger.h5ad" ftype="h5ad" compare="sim_size"/> |
426 <output name="per_gene" file="pp.filter_genes_dispersion.per_gene.krumsiek11-cell_ranger.tabular"/> | 340 </test> |
427 </test> | 341 <test> |
428 <test expect_num_outputs="1"> | 342 <!-- test 7 --> |
429 <conditional name="input"> | 343 <param name="adata" value="krumsiek11.h5ad" /> |
430 <param name="format" value="h5ad" /> | |
431 <param name="adata" value="krumsiek11.h5ad" /> | |
432 </conditional> | |
433 <conditional name="method"> | 344 <conditional name="method"> |
434 <param name="method" value="pp.subsample"/> | 345 <param name="method" value="pp.subsample"/> |
435 <conditional name="type"> | 346 <conditional name="type"> |
436 <param name="type" value="fraction" /> | 347 <param name="type" value="fraction" /> |
437 <param name="fraction" value="0.5"/> | 348 <param name="fraction" value="0.5"/> |
438 </conditional> | 349 </conditional> |
439 <param name="random_state" value="0"/> | 350 <param name="random_state" value="0"/> |
440 </conditional> | 351 </conditional> |
441 <conditional name="modify_anndata"> | |
442 <param name="modify_anndata" value="true"/> | |
443 <param name="anndata_output_format" value="h5ad" /> | |
444 </conditional> | |
445 <assert_stdout> | 352 <assert_stdout> |
446 <has_text_matching expression="sc.pp.subsample"/> | 353 <has_text_matching expression="sc.pp.subsample"/> |
447 <has_text_matching expression="fraction=0.5"/> | 354 <has_text_matching expression="fraction=0.5"/> |
448 <has_text_matching expression="random_state=0"/> | 355 <has_text_matching expression="random_state=0"/> |
449 </assert_stdout> | 356 </assert_stdout> |
450 <output name="anndata_out_h5ad" file="pp.subsample.krumsiek11_fraction.h5ad" ftype="h5" compare="sim_size"/> | 357 <output name="anndata_out" file="pp.subsample.krumsiek11_fraction.h5ad" ftype="h5ad" compare="sim_size"/> |
451 </test> | 358 </test> |
452 <test expect_num_outputs="1"> | 359 <test> |
453 <conditional name="input"> | 360 <!-- test 8 --> |
454 <param name="format" value="h5ad" /> | 361 <param name="adata" value="krumsiek11.h5ad" /> |
455 <param name="adata" value="krumsiek11.h5ad" /> | |
456 </conditional> | |
457 <conditional name="method"> | 362 <conditional name="method"> |
458 <param name="method" value="pp.subsample"/> | 363 <param name="method" value="pp.subsample"/> |
459 <conditional name="type"> | 364 <conditional name="type"> |
460 <param name="type" value="n_obs" /> | 365 <param name="type" value="n_obs" /> |
461 <param name="n_obs" value="10"/> | 366 <param name="n_obs" value="10"/> |
462 </conditional> | 367 </conditional> |
463 <param name="random_state" value="0"/> | 368 <param name="random_state" value="0"/> |
464 </conditional> | 369 </conditional> |
465 <conditional name="modify_anndata"> | |
466 <param name="modify_anndata" value="true"/> | |
467 <param name="anndata_output_format" value="h5ad" /> | |
468 </conditional> | |
469 <assert_stdout> | 370 <assert_stdout> |
470 <has_text_matching expression="sc.pp.subsample"/> | 371 <has_text_matching expression="sc.pp.subsample"/> |
471 <has_text_matching expression="n_obs=10"/> | 372 <has_text_matching expression="n_obs=10"/> |
472 <has_text_matching expression="random_state=0"/> | 373 <has_text_matching expression="random_state=0"/> |
473 </assert_stdout> | 374 </assert_stdout> |
474 <output name="anndata_out_h5ad" file="pp.subsample.krumsiek11_n_obs.h5ad" ftype="h5" compare="sim_size"/> | 375 <output name="anndata_out" file="pp.subsample.krumsiek11_n_obs.h5ad" ftype="h5ad" compare="sim_size"/> |
376 </test> | |
377 <test> | |
378 <!-- test 9 --> | |
379 <param name="adata" value="random-randint.h5ad" /> | |
380 <conditional name="method"> | |
381 <param name="method" value="pp.downsample_counts"/> | |
382 <param name="total_counts" value="20000"/> | |
383 <param name="random_state" value="0"/> | |
384 <param name="replace" value="false"/> | |
385 </conditional> | |
386 <assert_stdout> | |
387 <has_text_matching expression="sc.pp.downsample_counts"/> | |
388 <has_text_matching expression="total_counts=20000"/> | |
389 <has_text_matching expression="random_state=0"/> | |
390 <has_text_matching expression="replace=False"/> | |
391 </assert_stdout> | |
392 <output name="anndata_out" file="pp.downsample_counts.random-randint.h5ad" ftype="h5ad" compare="sim_size"/> | |
475 </test> | 393 </test> |
476 </tests> | 394 </tests> |
477 <help><![CDATA[ | 395 <help><![CDATA[ |
478 | 396 |
479 Filter cells outliers based on counts and numbers of genes expressed (`pp.filter_cells`) | 397 Filter cells outliers based on counts and numbers of genes expressed (`pp.filter_cells`) |
485 | 403 |
486 Only provide one of the optional parameters `min_counts`, `min_genes`, | 404 Only provide one of the optional parameters `min_counts`, `min_genes`, |
487 `max_counts`, `max_genes` per call. | 405 `max_counts`, `max_genes` per call. |
488 | 406 |
489 More details on the `scanpy documentation | 407 More details on the `scanpy documentation |
490 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_cells.html#scanpy.api.pp.filter_cells>`__ | 408 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.filter_cells.html>`__ |
491 | |
492 Return | |
493 ------ | |
494 | |
495 number_per_cell : Number per cell (either `n_counts` or `n_genes` per cell) | |
496 | 409 |
497 | 410 |
498 Filter genes based on number of cells or counts (`pp.filter_genes`) | 411 Filter genes based on number of cells or counts (`pp.filter_genes`) |
499 =================================================================== | 412 =================================================================== |
500 | 413 |
504 | 417 |
505 Only provide one of the optional parameters `min_counts`, `min_cells`, | 418 Only provide one of the optional parameters `min_counts`, `min_cells`, |
506 `max_counts`, `max_cells` per call. | 419 `max_counts`, `max_cells` per call. |
507 | 420 |
508 More details on the `scanpy documentation | 421 More details on the `scanpy documentation |
509 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_genes.html#scanpy.api.pp.filter_genes>`__ | 422 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.filter_genes.html>`__ |
510 | 423 |
511 Return | 424 |
512 ------ | 425 Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories (`tl.filter_rank_genes_groups`) |
513 | 426 ========================================================================================================================================================== |
514 number_per_gene : Number per genes (either `n_counts` or `n_genes` per cell) | |
515 | |
516 | |
517 Extract highly variable genes (`pp.filter_genes_dispersion`) | |
518 ============================================================ | |
519 | |
520 If trying out parameters, pass the data matrix instead of AnnData. | |
521 | |
522 Depending on `flavor`, this reproduces the R-implementations of Seurat and Cell Ranger. | |
523 | |
524 The normalized dispersion is obtained by scaling with the mean and standard | |
525 deviation of the dispersions for genes falling into a given bin for mean | |
526 expression of genes. This means that for each bin of mean expression, highly | |
527 variable genes are selected. | |
528 | |
529 Use `flavor='cell_ranger'` with care and in the same way as in `pp.recipe_zheng17`. | |
530 | 427 |
531 More details on the `scanpy documentation | 428 More details on the `scanpy documentation |
532 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_genes_dispersion.html#scanpy.api.pp.filter_genes_dispersion>`__ | 429 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.filter_rank_genes_groups.html>`__ |
533 | 430 |
534 Returns | 431 |
535 ------- | 432 Annotate highly variable genes (`pp.highly_variable_genes`) |
536 - The annotated matrix filtered, with the annotations | 433 =========================================================== |
537 - A table with the means, dispersions, and normalized dispersions per gene, logarithmized when `log` is `True`. | 434 |
435 It expects logarithmized data. | |
436 | |
437 Depending on flavor, this reproduces the R-implementations of Seurat or Cell Ranger. The normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for genes falling into a given bin for mean expression of genes. This means that for each bin of mean expression, highly variable genes are selected. | |
538 | 438 |
539 | 439 |
540 Subsample to a fraction of the number of observations (`pp.subsample`) | 440 Subsample to a fraction of the number of observations (`pp.subsample`) |
541 ====================================================================== | 441 ====================================================================== |
542 | 442 |
543 More details on the `scanpy documentation | 443 More details on the `scanpy documentation |
544 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.subsample.html#scanpy.api.pp.subsample>`__ | 444 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.subsample.html>`__ |
445 | |
446 Downsample counts (`pp.downsample_counts`) | |
447 ========================================== | |
448 | |
449 Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This | |
450 has been implemented by M. D. Luecken. | |
451 | |
452 More details on the `scanpy documentation | |
453 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.downsample_counts.html>`__ | |
545 | 454 |
546 | 455 |
547 ]]></help> | 456 ]]></help> |
548 <expand macro="citations"/> | 457 <expand macro="citations"/> |
549 </tool> | 458 </tool> |