Mercurial > repos > iuc > scanpy_normalize
comparison normalize.xml @ 0:ed64c90a9b93 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 92f85afaed0097d1879317a9f513093fce5481d6
author | iuc |
---|---|
date | Mon, 04 Mar 2019 10:16:12 -0500 |
parents | |
children | a9f14e2d1655 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ed64c90a9b93 |
---|---|
1 <tool id="scanpy_normalize" name="Normalize with scanpy" version="@galaxy_version@"> | |
2 <description></description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements"/> | |
7 <expand macro="version_command"/> | |
8 <command detect_errors="exit_code"><![CDATA[ | |
9 @CMD@ | |
10 ]]></command> | |
11 <configfiles> | |
12 <configfile name="script_file"><![CDATA[ | |
13 @CMD_imports@ | |
14 @CMD_read_inputs@ | |
15 | |
16 #if $method.method == "pp.normalize_per_cell" | |
17 sc.pp.normalize_per_cell( | |
18 data=adata, | |
19 #if $method.counts_per_cell_after | |
20 counts_per_cell_after=$method.counts_per_cell_after, | |
21 #end if | |
22 #if $method.counts_per_cell | |
23 counts_per_cell=np.loadtxt('$method.counts_per_cell'), | |
24 #end if | |
25 key_n_counts='$method.key_n_counts', | |
26 copy=False) | |
27 adata.obs.to_csv('$anndata_obs', sep='\t') | |
28 #elif $method.method == "pp.recipe_zheng17" | |
29 sc.pp.recipe_zheng17( | |
30 adata=adata, | |
31 n_top_genes=$method.n_top_genes, | |
32 log=$method.log, | |
33 plot=False, | |
34 copy=False) | |
35 #elif $method.method == "pp.recipe_weinreb17" | |
36 sc.pp.recipe_weinreb17( | |
37 adata=adata, | |
38 log=$method.log, | |
39 mean_threshold=$method.mean_threshold, | |
40 cv_threshold=$method.cv_threshold, | |
41 n_pcs=$method.n_pcs, | |
42 svd_solver='$method.svd_solver', | |
43 random_state=$method.random_state, | |
44 copy=False) | |
45 #elif $method.method == "pp.recipe_seurat" | |
46 sc.pp.recipe_seurat( | |
47 adata=adata, | |
48 log=$method.log, | |
49 plot=False, | |
50 copy=False) | |
51 #elif $method.method == "pp.log1p" | |
52 sc.pp.log1p( | |
53 data=adata, | |
54 copy=False) | |
55 #elif $method.method == "pp.scale" | |
56 sc.pp.scale( | |
57 data=adata, | |
58 zero_center=$method.zero_center, | |
59 #if $method.max_value | |
60 max_value=$method.max_value, | |
61 #end if | |
62 copy=False) | |
63 #elif $method.method == "pp.sqrt" | |
64 sc.pp.sqrt( | |
65 data=adata, | |
66 copy=False) | |
67 #elif $method.method == "pp.downsample_counts" | |
68 sc.pp.downsample_counts( | |
69 adata=adata, | |
70 target_counts=$method.target_counts, | |
71 random_state=$method.random_state, | |
72 copy=False) | |
73 #end if | |
74 | |
75 @CMD_anndata_write_outputs@ | |
76 | |
77 ]]></configfile> | |
78 </configfiles> | |
79 <inputs> | |
80 <expand macro="inputs_anndata"/> | |
81 <conditional name="method"> | |
82 <param argument="method" type="select" label="Method used for plotting"> | |
83 <option value="pp.normalize_per_cell">Normalize total counts per cell, using `pp.normalize_per_cell`</option> | |
84 <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using `pp.recipe_zheng17`</option> | |
85 <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using `pp.recipe_weinreb17`</option> | |
86 <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using `pp.recipe_seurat`</option> | |
87 <option value="pp.log1p">Logarithmize the data matrix, using `pp.log1p`</option> | |
88 <option value="pp.scale">Scale data to unit variance and zero mean, using `pp.scale`</option> | |
89 <option value="pp.sqrt">Square root the data matrix, using `pp.sqrt`</option> | |
90 <option value="pp.downsample_counts">Downsample counts, using `pp.downsample_counts`</option> | |
91 </param> | |
92 <when value="pp.normalize_per_cell"> | |
93 <param argument="counts_per_cell_after" type="float" value="" optional="true" label="Counts per cell after" help="If not provided, after normalization, each cell has a total count equal to the median of the *counts_per_cell* before normalization."/> | |
94 <param argument="counts_per_cell" type="data" format="tabular,txt" optional="true" label="Precomputed counts per cell" help=""/> | |
95 <param argument="key_n_counts" type="text" value="n_counts" label="Name of the field in `adata.obs` where the total counts per cell will be stored" help=""/> | |
96 </when> | |
97 <when value="pp.recipe_zheng17"> | |
98 <param argument="n_top_genes" type="integer" min="0" value="1000" label="Number of genes to keep" help=""/> | |
99 <expand macro="param_log"/> | |
100 </when> | |
101 <when value="pp.recipe_weinreb17"> | |
102 <expand macro="param_log"/> | |
103 <param argument="mean_threshold" type="float" value="0.01" label="Mean threshold" help=""/> | |
104 <param argument="cv_threshold" type="float" value="2" label="CV threshold" help=""/> | |
105 <param argument="n_pcs" type="integer" min="0" value="50" label="Number of principal component" help=""/> | |
106 <expand macro="svd_solver"/> | |
107 <expand macro="pca_random_state"/> | |
108 </when> | |
109 <when value="pp.recipe_seurat"> | |
110 <expand macro="param_log"/> | |
111 </when> | |
112 <when value="pp.log1p"/> | |
113 <when value="pp.scale"> | |
114 <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true" | |
115 label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/> | |
116 <param argument="max_value" type="float" value="" optional="true" label="Maximum value" | |
117 help="Clip (truncate) to this value after scaling. If not set, it does not clip."/> | |
118 </when> | |
119 <when value="pp.sqrt"/> | |
120 <when value="pp.downsample_counts"> | |
121 <param argument="target_counts" type="integer" min="0" value="20000" | |
122 label="Target number of counts for downsampling" help="Cells with more counts than 'target_counts' will be downsampled to have 'target_counts' counts."/> | |
123 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling" help=""/> | |
124 </when> | |
125 </conditional> | |
126 <expand macro="anndata_output_format"/> | |
127 </inputs> | |
128 <outputs> | |
129 <expand macro="anndata_outputs"/> | |
130 <data name="anndata_obs" format="tabular" label="${tool.name} on ${on_string}: Annotation of observations"> | |
131 <filter>method['method'] == 'pp.normalize_per_cell'</filter> | |
132 </data> | |
133 </outputs> | |
134 <tests> | |
135 <test> | |
136 <conditional name="input"> | |
137 <param name="format" value="h5ad" /> | |
138 <param name="adata" value="krumsiek11.h5ad" /> | |
139 </conditional> | |
140 <conditional name="method"> | |
141 <param name="method" value="pp.normalize_per_cell"/> | |
142 <param name="counts_per_cell_after" value="2"/> | |
143 <param name="counts_per_cell" value="krumsiek11_counts_per_cell"/> | |
144 <param name="key_n_counts" value="n_counts"/> | |
145 </conditional> | |
146 <param name="anndata_output_format" value="h5ad"/> | |
147 <assert_stdout> | |
148 <has_text_matching expression="sc.pp.normalize_per_cell"/> | |
149 <has_text_matching expression="counts_per_cell_after=2.0"/> | |
150 <has_text_matching expression="counts_per_cell=np.loadtxt"/> | |
151 <has_text_matching expression="key_n_counts='n_counts'"/> | |
152 </assert_stdout> | |
153 <output name="anndata_out_h5ad" file="pp.normalize_per_cell.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> | |
154 <output name="anndata_obs" file="pp.normalize_per_cell.obs.krumsiek11.tabular"/> | |
155 </test> | |
156 <test> | |
157 <conditional name="input"> | |
158 <param name="format" value="h5ad" /> | |
159 <param name="adata" value="random-randint.h5ad"/> | |
160 </conditional> | |
161 <conditional name="method"> | |
162 <param name="method" value="pp.recipe_zheng17"/> | |
163 <param name="n_top_genes" value="1000"/> | |
164 <param name="log" value="True"/> | |
165 </conditional> | |
166 <param name="anndata_output_format" value="h5ad"/> | |
167 <assert_stdout> | |
168 <has_text_matching expression="sc.pp.recipe_zheng17"/> | |
169 <has_text_matching expression="n_top_genes=1000"/> | |
170 <has_text_matching expression="log=True"/> | |
171 </assert_stdout> | |
172 <output name="anndata_out_h5ad" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5" compare="sim_size"/> | |
173 </test> | |
174 <test> | |
175 <conditional name="input"> | |
176 <param name="format" value="h5ad" /> | |
177 <param name="adata" value="paul15_subsample.h5ad" /> | |
178 </conditional> | |
179 <conditional name="method"> | |
180 <param name="method" value="pp.recipe_weinreb17"/> | |
181 <param name="log" value="True"/> | |
182 <param name="mean_threshold" value="0.01"/> | |
183 <param name="cv_threshold" value="2.0"/> | |
184 <param name="n_pcs" value="50"/> | |
185 <param name="svd_solver" value="randomized"/> | |
186 <param name="random_state" value="0"/> | |
187 </conditional> | |
188 <param name="anndata_output_format" value="h5ad" /> | |
189 <assert_stdout> | |
190 <has_text_matching expression="sc.pp.recipe_weinreb17"/> | |
191 <has_text_matching expression="log=True"/> | |
192 <has_text_matching expression="mean_threshold=0.01"/> | |
193 <has_text_matching expression="cv_threshold=2.0"/> | |
194 <has_text_matching expression="n_pcs=50"/> | |
195 <has_text_matching expression="svd_solver='randomized'"/> | |
196 <has_text_matching expression="random_state=0"/> | |
197 </assert_stdout> | |
198 <output name="anndata_out_h5ad" file="pp.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"/> | |
199 </test> | |
200 <test> | |
201 <conditional name="input"> | |
202 <param name="format" value="h5ad" /> | |
203 <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" /> | |
204 </conditional> | |
205 <conditional name="method"> | |
206 <param name="method" value="pp.recipe_seurat"/> | |
207 <param name="log" value="True"/> | |
208 </conditional> | |
209 <param name="anndata_output_format" value="h5ad"/> | |
210 <assert_stdout> | |
211 <has_text_matching expression="sc.pp.recipe_seurat"/> | |
212 <has_text_matching expression="log=True"/> | |
213 </assert_stdout> | |
214 <output name="anndata_out_h5ad" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5" compare="sim_size"/> | |
215 </test> | |
216 <test> | |
217 <conditional name="input"> | |
218 <param name="format" value="h5ad" /> | |
219 <param name="adata" value="krumsiek11.h5ad" /> | |
220 </conditional> | |
221 <conditional name="method"> | |
222 <param name="method" value="pp.log1p"/> | |
223 </conditional> | |
224 <param name="anndata_output_format" value="h5ad" /> | |
225 <assert_stdout> | |
226 <has_text_matching expression="sc.pp.log1p"/> | |
227 </assert_stdout> | |
228 <output name="anndata_out_h5ad" file="pp.log1p.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> | |
229 </test> | |
230 <test> | |
231 <conditional name="input"> | |
232 <param name="format" value="h5ad" /> | |
233 <param name="adata" value="krumsiek11.h5ad" /> | |
234 </conditional> | |
235 <conditional name="method"> | |
236 <param name="method" value="pp.scale"/> | |
237 <param name="zero_center" value="true"/> | |
238 </conditional> | |
239 <param name="anndata_output_format" value="h5ad" /> | |
240 <assert_stdout> | |
241 <has_text_matching expression="sc.pp.scale"/> | |
242 <has_text_matching expression="zero_center=True"/> | |
243 </assert_stdout> | |
244 <output name="anndata_out_h5ad" file="pp.scale.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> | |
245 </test> | |
246 <test> | |
247 <conditional name="input"> | |
248 <param name="format" value="h5ad" /> | |
249 <param name="adata" value="krumsiek11.h5ad" /> | |
250 </conditional> | |
251 <conditional name="method"> | |
252 <param name="method" value="pp.scale"/> | |
253 <param name="zero_center" value="true"/> | |
254 <param name="max_value" value="10"/> | |
255 </conditional> | |
256 <param name="anndata_output_format" value="h5ad" /> | |
257 <assert_stdout> | |
258 <has_text_matching expression="sc.pp.scale"/> | |
259 <has_text_matching expression="zero_center=True"/> | |
260 <has_text_matching expression="max_value=10.0"/> | |
261 </assert_stdout> | |
262 <output name="anndata_out_h5ad" file="pp.scale_max_value.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> | |
263 </test> | |
264 <test> | |
265 <conditional name="input"> | |
266 <param name="format" value="h5ad" /> | |
267 <param name="adata" value="krumsiek11.h5ad" /> | |
268 </conditional> | |
269 <conditional name="method"> | |
270 <param name="method" value="pp.sqrt"/> | |
271 </conditional> | |
272 <param name="anndata_output_format" value="h5ad" /> | |
273 <assert_stdout> | |
274 <has_text_matching expression="sc.pp.sqrt"/> | |
275 </assert_stdout> | |
276 <output name="anndata_out_h5ad" file="pp.sqrt.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> | |
277 </test> | |
278 <test> | |
279 <conditional name="input"> | |
280 <param name="format" value="h5ad" /> | |
281 <param name="adata" value="random-randint.h5ad" /> | |
282 </conditional> | |
283 <conditional name="method"> | |
284 <param name="method" value="pp.downsample_counts"/> | |
285 <param name="target_counts" value="20000"/> | |
286 <param name="random_state" value="0"/> | |
287 </conditional> | |
288 <param name="anndata_output_format" value="h5ad" /> | |
289 <assert_stdout> | |
290 <has_text_matching expression="sc.pp.downsample_counts"/> | |
291 <has_text_matching expression="target_counts=20000"/> | |
292 <has_text_matching expression="random_state=0"/> | |
293 </assert_stdout> | |
294 <output name="anndata_out_h5ad" ftype="h5"> | |
295 <assert_contents> | |
296 <has_h5_keys keys="X, obs, var" /> | |
297 </assert_contents> | |
298 </output> | |
299 </test> | |
300 </tests> | |
301 <help><![CDATA[ | |
302 Normalize total counts per cell (`pp.normalize_per_cell`) | |
303 ========================================================= | |
304 | |
305 Normalize each cell by total counts over all genes, so that every cell has | |
306 the same total count after normalization. | |
307 | |
308 Similar functions are used, for example, by Seurat, Cell Ranger or SPRING. | |
309 | |
310 More details on the `scanpy documentation | |
311 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.normalize_per_cell.html>`__ | |
312 | |
313 | |
314 Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`) | |
315 ================================================================================================================== | |
316 | |
317 Expects non-logarithmized data. If using logarithmized data, pass `log=False`. | |
318 | |
319 The recipe runs the following steps: | |
320 | |
321 - only consider genes with more than 1 count | |
322 - normalize with total UMI count per cell | |
323 - select highly-variable genes | |
324 - subset the genes | |
325 - renormalize after filtering | |
326 - log transform (if needed) | |
327 - scale to unit variance and shift to zero mean | |
328 | |
329 More details on the `scanpy documentation | |
330 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_zheng17.html>`__ | |
331 | |
332 | |
333 Normalization and filtering as of Weinreb et al (2017) (`pp.recipe_weinreb17`) | |
334 ============================================================================== | |
335 | |
336 Expects non-logarithmized data. If using logarithmized data, pass `log=False`. | |
337 | |
338 More details on the `scanpy documentation | |
339 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_weinreb17.html>`__ | |
340 | |
341 | |
342 Normalization and filtering as of Seurat et al (2015) (`pp.recipe_seurat`) | |
343 ========================================================================== | |
344 | |
345 This uses a particular preprocessing. | |
346 | |
347 Expects non-logarithmized data. If using logarithmized data, pass `log=False`. | |
348 | |
349 More details on the `scanpy documentation | |
350 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_seurat.html>`__ | |
351 | |
352 Logarithmize the data matrix (`pp.log1p`) | |
353 ========================================= | |
354 | |
355 More details on the `scanpy documentation | |
356 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.log1p.html>`__ | |
357 | |
358 Scale data to unit variance and zero mean (`pp.scale`) | |
359 ====================================================== | |
360 | |
361 More details on the `scanpy documentation | |
362 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.scale.html>`__ | |
363 | |
364 Computes the square root the data matrix (`pp.sqrt`) | |
365 ==================================================== | |
366 | |
367 `X = sqrt(X)` | |
368 | |
369 Downsample counts (`pp.downsample_counts`) | |
370 ========================================== | |
371 | |
372 Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This | |
373 has been implemented by M. D. Luecken. | |
374 | |
375 More details on the `scanpy documentation | |
376 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.downsample_counts.html>`__ | |
377 | |
378 ]]></help> | |
379 <expand macro="citations"/> | |
380 </tool> |