comparison normalize.xml @ 13:7b9fafe32c86 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 3974c0ff86bb1d6633281f29dfb4c605a74e1e6e
author iuc
date Thu, 14 Mar 2024 15:37:29 +0000
parents 94c19fb1281c
children d844935c906c
comparison
equal deleted inserted replaced
12:94c19fb1281c 13:7b9fafe32c86
1 <tool id="scanpy_normalize" name="Normalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@"> 1 <tool id="scanpy_normalize" name="Normalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">
2 <description>with scanpy</description> 2 <description>and impute with scanpy</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="bio_tools"/> 6 <expand macro="bio_tools"/>
7 <expand macro="requirements"/> 7 <expand macro="requirements"/>
63 adata=adata, 63 adata=adata,
64 log=$method.log, 64 log=$method.log,
65 plot=False, 65 plot=False,
66 copy=False) 66 copy=False)
67 67
68 #else if $method.method == "external.pp.magic"
69 sc.external.pp.magic(
70 adata=adata,
71 name_list='$method.name_list',
72 knn=$method.knn,
73 #if str($method.decay) != ''
74 decay=$method.decay,
75 #end if
76 #if str($method.knn_max) != ''
77 knn_max=$method.knn_max,
78 #end if
79 #if $method.t == -1
80 t='auto',
81 #else
82 t=$method.t,
83 #end if
84 #if str($method.n_pca) != ''
85 n_pca=$method.n_pca,
86 #end if
87 solver='$method.solver',
88 knn_dist='$method.knn_dist',
89 random_state=$method.random_state,
90 copy=False)
68 #end if 91 #end if
69 92
70 @CMD_anndata_write_outputs@ 93 @CMD_anndata_write_outputs@
71 94
72 ]]></configfile> 95 ]]></configfile>
77 <param argument="method" type="select" label="Method used for normalization"> 100 <param argument="method" type="select" label="Method used for normalization">
78 <option value="pp.normalize_total">Normalize counts per cell, using 'pp.normalize_total'</option> 101 <option value="pp.normalize_total">Normalize counts per cell, using 'pp.normalize_total'</option>
79 <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using 'pp.recipe_zheng17'</option> 102 <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using 'pp.recipe_zheng17'</option>
80 <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using 'pp.recipe_weinreb17'</option> 103 <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using 'pp.recipe_weinreb17'</option>
81 <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using 'pp.recipe_seurat'</option> 104 <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using 'pp.recipe_seurat'</option>
105 <option value="external.pp.magic">Denoising using Markov Affinity-based Graph Imputation of Cells (MAGIC) API 'external.pp.magic'</option>
82 </param> 106 </param>
83 <when value="pp.normalize_total"> 107 <when value="pp.normalize_total">
84 <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/> 108 <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/>
85 <conditional name="exclude_highly_expressed"> 109 <conditional name="exclude_highly_expressed">
86 <param argument="exclude_highly_expressed" type="select" label="Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell" help=" A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum"> 110 <param argument="exclude_highly_expressed" type="select" label="Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell" help=" A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum">
117 <expand macro="pca_random_state"/> 141 <expand macro="pca_random_state"/>
118 </when> 142 </when>
119 <when value="pp.recipe_seurat"> 143 <when value="pp.recipe_seurat">
120 <expand macro="param_log"/> 144 <expand macro="param_log"/>
121 </when> 145 </when>
146 <when value="external.pp.magic">
147 <param name="name_list" type="select" label="Denoised genes to return" help="Selecting all genes may require a large amount of memory">
148 <option value="all_genes">All genes</option>
149 <option value="pca_only">PCA only</option>
150 </param>
151 <param argument="knn" type="integer" min="1" value="5" label="Number of nearest neighbors on which to build kernel" help=""/>
152 <param argument="decay" type="integer" optional="true" value="1" label="Set decay rate of kernel tails"
153 help="If not set, alpha decaying kernel is not used" />
154 <param argument="knn_max" type="integer" min="1" optional="true" value="" label="Maximum number of nearest neighbors with nonzero connection"
155 help="If not set, will be set to 3 * knn" />
156 <param argument="t" type="integer" min="-1" value="3" label="Power to which the diffusion operator is powered. This sets the level of diffusion"
157 help="If ‘-1’, this parameter is selected according to the Procrustes disparity of the diffused data." />
158 <param argument="n_pca" type="integer" value="100" optional="true" label="Number of principal components to use for calculating neighborhoods"
159 help="For extremely large datasets, using n_pca less than 20 allows neighborhoods to be calculated in roughly log(n_samples) time. If not set, no PCA is performed." />
160 <param name="solver" type="select" label="Which solver to use" help="Selecting all genes may require a large amount of memory">
161 <option value="exact">"exact", the implementation described in van Dijk et al. (2018) </option>
162 <option value="approximate">"approximate", is faster that performs imputation in the PCA space and then projects back to the gene space</option>
163 </param>
164 <param name="knn_dist" type="select" label="Distance metric to use for the data" help="See scipy.spatial.distance.pdist documentation for more options https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html">
165 <expand macro="distance_metric_options"/>
166 </param>
167 <expand macro="param_random_state"/>
168 </when>
122 </conditional> 169 </conditional>
123 <expand macro="inputs_common_advanced"/> 170 <expand macro="inputs_common_advanced"/>
124 </inputs> 171 </inputs>
125 <outputs> 172 <outputs>
126 <expand macro="anndata_outputs"/> 173 <expand macro="anndata_outputs"/>
215 <has_text_matching expression="log=True"/> 262 <has_text_matching expression="log=True"/>
216 </assert_contents> 263 </assert_contents>
217 </output> 264 </output>
218 <output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.25"/> 265 <output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.25"/>
219 </test> 266 </test>
267 <test expect_num_outputs="2">
268 <!-- test 5 -->
269 <param name="adata" value="krumsiek11.h5ad" />
270 <conditional name="method">
271 <param name="method" value="external.pp.magic"/>
272 <param name="name_list" value="all_genes"/>
273 <param name="t" value="-1"/>
274 <param name="n_pca" value="5"/>
275 </conditional>
276 <section name="advanced_common">
277 <param name="show_log" value="true" />
278 </section>
279 <output name="hidden_output">
280 <assert_contents>
281 <has_text_matching expression="external.pp.magic"/>
282 <has_text_matching expression="name_list='all_genes'"/>
283 <has_text_matching expression="t='auto'"/>
284 <has_text_matching expression="n_pca=5"/>
285 </assert_contents>
286 </output>
287 <output name="anndata_out" file="external.pp.magic.all_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
288 </test>
289 <test expect_num_outputs="2">
290 <!-- test 6 -->
291 <param name="adata" value="krumsiek11.h5ad" />
292 <conditional name="method">
293 <param name="method" value="external.pp.magic"/>
294 <param name="name_list" value="pca_only"/>
295 <param name="t" value="3"/>
296 <param name="n_pca" value="5"/>
297 </conditional>
298 <section name="advanced_common">
299 <param name="show_log" value="true" />
300 </section>
301 <output name="hidden_output">
302 <assert_contents>
303 <has_text_matching expression="external.pp.magic"/>
304 <has_text_matching expression="name_list='pca_only'"/>
305 <has_text_matching expression="t=3"/>
306 <has_text_matching expression="n_pca=5"/>
307 </assert_contents>
308 </output>
309 <output name="anndata_out" file="external.pp.magic.pca_only.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
310 <assert_stdout>
311 <has_text text="X_magic"/>
312 </assert_stdout>
313 </test>
220 </tests> 314 </tests>
221 <help><![CDATA[ 315 <help><![CDATA[
222 Normalize total counts per cell (`pp.normalize_per_cell`) 316 Normalize total counts per cell (`pp.normalize_per_cell`)
223 ========================================================= 317 =========================================================
224 318
267 Expects non-logarithmized data. If using logarithmized data, pass `log=False`. 361 Expects non-logarithmized data. If using logarithmized data, pass `log=False`.
268 362
269 More details on the `scanpy documentation 363 More details on the `scanpy documentation
270 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.recipe_seurat.html>`__ 364 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.recipe_seurat.html>`__
271 365
366
367 Markov Affinity-based Graph Imputation of Cells (MAGIC) as of Van Dijk D et al. (2018) (`external.pp.magic`)
368 ============================================================================================================
369
370 MAGIC is an algorithm for denoising and transcript recover of single cells applied to single-cell sequencing data. MAGIC builds a graph from the data and uses diffusion to smooth out noise and recover the data manifold.
371
372 The algorithm implemented here has changed primarily in two ways compared to the algorithm described in Van Dijk D et al. (2018).
373
374 - Firstly, we use the adaptive kernel described in Moon et al, (2019) for improved stability.
375 - Secondly, data diffusion is applied in the PCA space, rather than the data space, for speed and memory improvements.
376
377 More details on the `scanpy documentation
378 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.external.pp.magic.html>`__
379
272 ]]></help> 380 ]]></help>
273 <expand macro="citations"/> 381 <expand macro="citations"/>
274 </tool> 382 </tool>