Mercurial > repos > iuc > scanpy_normalize
diff normalize.xml @ 1:a9f14e2d1655 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 8ef5f7c6f8728608a3f05bb51e11b642b84a05f5"
author | iuc |
---|---|
date | Wed, 16 Oct 2019 06:31:10 -0400 |
parents | ed64c90a9b93 |
children | 8e0f141c8c66 |
line wrap: on
line diff
--- a/normalize.xml Mon Mar 04 10:16:12 2019 -0500 +++ b/normalize.xml Wed Oct 16 06:31:10 2019 -0400 @@ -1,5 +1,5 @@ -<tool id="scanpy_normalize" name="Normalize with scanpy" version="@galaxy_version@"> - <description></description> +<tool id="scanpy_normalize" name="Normalize" version="@galaxy_version@"> + <description>with scanpy</description> <macros> <import>macros.xml</import> </macros> @@ -13,26 +13,36 @@ @CMD_imports@ @CMD_read_inputs@ -#if $method.method == "pp.normalize_per_cell" -sc.pp.normalize_per_cell( - data=adata, - #if $method.counts_per_cell_after - counts_per_cell_after=$method.counts_per_cell_after, +#if $method.method == "pp.normalize_total" +sc.pp.normalize_total( + adata, + #if str($method.target_sum)!= '' + target_sum=$method.target_sum, + #end if + exclude_highly_expressed=$method.exclude_highly_expressed.exclude_highly_expressed, + #if $method.exclude_highly_expressed.exclude_highly_expressed == "True" + max_fraction=$method.exclude_highly_expressed.max_fraction, #end if - #if $method.counts_per_cell - counts_per_cell=np.loadtxt('$method.counts_per_cell'), + key_added='$method.key_added', + #if str($method.layers) != 'all' + layers[str(x.strip()) for x in str($method.layers).split(',')], + #else + layers='$method.layers', #end if - key_n_counts='$method.key_n_counts', - copy=False) -adata.obs.to_csv('$anndata_obs', sep='\t') -#elif $method.method == "pp.recipe_zheng17" + #if str($method.layer_norm) != "None" + layer_norm='$method.layer_norm', + #end if + inplace=True) + +#else if $method.method == "pp.recipe_zheng17" sc.pp.recipe_zheng17( adata=adata, n_top_genes=$method.n_top_genes, log=$method.log, plot=False, copy=False) -#elif $method.method == "pp.recipe_weinreb17" + +#else if $method.method == "pp.recipe_weinreb17" sc.pp.recipe_weinreb17( adata=adata, log=$method.log, @@ -42,34 +52,14 @@ svd_solver='$method.svd_solver', random_state=$method.random_state, copy=False) -#elif $method.method == "pp.recipe_seurat" + +#else if $method.method == "pp.recipe_seurat" sc.pp.recipe_seurat( adata=adata, log=$method.log, plot=False, copy=False) -#elif $method.method == "pp.log1p" -sc.pp.log1p( - data=adata, - copy=False) -#elif $method.method == "pp.scale" -sc.pp.scale( - data=adata, - zero_center=$method.zero_center, - #if $method.max_value - max_value=$method.max_value, - #end if - copy=False) -#elif $method.method == "pp.sqrt" -sc.pp.sqrt( - data=adata, - copy=False) -#elif $method.method == "pp.downsample_counts" -sc.pp.downsample_counts( - adata=adata, - target_counts=$method.target_counts, - random_state=$method.random_state, - copy=False) + #end if @CMD_anndata_write_outputs@ @@ -79,20 +69,31 @@ <inputs> <expand macro="inputs_anndata"/> <conditional name="method"> - <param argument="method" type="select" label="Method used for plotting"> - <option value="pp.normalize_per_cell">Normalize total counts per cell, using `pp.normalize_per_cell`</option> + <param argument="method" type="select" label="Method used for normalization"> + <option value="pp.normalize_total">Normalize counts per cell, using `pp.normalize_total`</option> <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using `pp.recipe_zheng17`</option> <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using `pp.recipe_weinreb17`</option> <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using `pp.recipe_seurat`</option> - <option value="pp.log1p">Logarithmize the data matrix, using `pp.log1p`</option> - <option value="pp.scale">Scale data to unit variance and zero mean, using `pp.scale`</option> - <option value="pp.sqrt">Square root the data matrix, using `pp.sqrt`</option> - <option value="pp.downsample_counts">Downsample counts, using `pp.downsample_counts`</option> </param> - <when value="pp.normalize_per_cell"> - <param argument="counts_per_cell_after" type="float" value="" optional="true" label="Counts per cell after" help="If not provided, after normalization, each cell has a total count equal to the median of the *counts_per_cell* before normalization."/> - <param argument="counts_per_cell" type="data" format="tabular,txt" optional="true" label="Precomputed counts per cell" help=""/> - <param argument="key_n_counts" type="text" value="n_counts" label="Name of the field in `adata.obs` where the total counts per cell will be stored" help=""/> + <when value="pp.normalize_total"> + <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/> + <conditional name="exclude_highly_expressed"> + <param argument="exclude_highly_expressed" type="select" label="Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell" help=" A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum"> + <option value="True">Yes</option> + <option value="False" selected="true">No</option> + </param> + <when value="True"> + <param argument="max_fraction" type="float" value="0.05" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/> + </when> + <when value="False"/> + </conditional> + <param argument="key_added" type="text" value="n_counts" label="Name of the field in `adata.obs` where the normalization factor is stored" help=""/> + <param argument="layers" type="text" value="all" label="List of layers to normalize" help="'All' will normalize all layers. The list should be comma-separated."/> + <param argument="layer_norm" type="select" label="How to normalize layers?"> + <option value="None">None: after normalization, for each layer in layers each cell has a total count equal to the median of the median of the total counts (cells) before normalization of the layer.</option> + <option value="after">After: for each layer in layers each cell has a total count equal to target_sum.</option> + <option value="X">X: for each layer in layers each cell has a total count equal to the median of total counts for observations (cells) of adata.X before normalization.</option> + </param> </when> <when value="pp.recipe_zheng17"> <param argument="n_top_genes" type="integer" min="0" value="1000" label="Number of genes to keep" help=""/> @@ -109,73 +110,50 @@ <when value="pp.recipe_seurat"> <expand macro="param_log"/> </when> - <when value="pp.log1p"/> - <when value="pp.scale"> - <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true" - label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/> - <param argument="max_value" type="float" value="" optional="true" label="Maximum value" - help="Clip (truncate) to this value after scaling. If not set, it does not clip."/> - </when> - <when value="pp.sqrt"/> - <when value="pp.downsample_counts"> - <param argument="target_counts" type="integer" min="0" value="20000" - label="Target number of counts for downsampling" help="Cells with more counts than 'target_counts' will be downsampled to have 'target_counts' counts."/> - <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling" help=""/> - </when> </conditional> - <expand macro="anndata_output_format"/> </inputs> <outputs> <expand macro="anndata_outputs"/> - <data name="anndata_obs" format="tabular" label="${tool.name} on ${on_string}: Annotation of observations"> - <filter>method['method'] == 'pp.normalize_per_cell'</filter> - </data> </outputs> <tests> <test> - <conditional name="input"> - <param name="format" value="h5ad" /> - <param name="adata" value="krumsiek11.h5ad" /> - </conditional> + <!-- test 1 --> + <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> - <param name="method" value="pp.normalize_per_cell"/> - <param name="counts_per_cell_after" value="2"/> - <param name="counts_per_cell" value="krumsiek11_counts_per_cell"/> - <param name="key_n_counts" value="n_counts"/> + <param name="method" value="pp.normalize_total"/> + <conditional name="exclude_highly_expressed"> + <param name="exclude_highly_expressed" value="False"/> + </conditional> + <param name="key_added" value="n_counts"/> + <param name="layers" value="all"/> + <param name="layer_norm" value="None"/> </conditional> - <param name="anndata_output_format" value="h5ad"/> <assert_stdout> - <has_text_matching expression="sc.pp.normalize_per_cell"/> - <has_text_matching expression="counts_per_cell_after=2.0"/> - <has_text_matching expression="counts_per_cell=np.loadtxt"/> - <has_text_matching expression="key_n_counts='n_counts'"/> + <has_text_matching expression="sc.pp.normalize_total"/> + <has_text_matching expression="exclude_highly_expressed=False"/> + <has_text_matching expression="key_added='n_counts'"/> + <has_text_matching expression="layers='all'"/> </assert_stdout> - <output name="anndata_out_h5ad" file="pp.normalize_per_cell.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> - <output name="anndata_obs" file="pp.normalize_per_cell.obs.krumsiek11.tabular"/> + <output name="anndata_out" file="pp.normalize_total.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> </test> <test> - <conditional name="input"> - <param name="format" value="h5ad" /> - <param name="adata" value="random-randint.h5ad"/> - </conditional> + <!-- test 2 --> + <param name="adata" value="random-randint.h5ad"/> <conditional name="method"> <param name="method" value="pp.recipe_zheng17"/> <param name="n_top_genes" value="1000"/> <param name="log" value="True"/> </conditional> - <param name="anndata_output_format" value="h5ad"/> <assert_stdout> <has_text_matching expression="sc.pp.recipe_zheng17"/> <has_text_matching expression="n_top_genes=1000"/> <has_text_matching expression="log=True"/> </assert_stdout> - <output name="anndata_out_h5ad" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5" compare="sim_size"/> + <output name="anndata_out" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5ad" compare="sim_size"/> </test> <test> - <conditional name="input"> - <param name="format" value="h5ad" /> - <param name="adata" value="paul15_subsample.h5ad" /> - </conditional> + <!-- test 3 --> + <param name="adata" value="paul15_subsample.h5ad" /> <conditional name="method"> <param name="method" value="pp.recipe_weinreb17"/> <param name="log" value="True"/> @@ -185,7 +163,6 @@ <param name="svd_solver" value="randomized"/> <param name="random_state" value="0"/> </conditional> - <param name="anndata_output_format" value="h5ad" /> <assert_stdout> <has_text_matching expression="sc.pp.recipe_weinreb17"/> <has_text_matching expression="log=True"/> @@ -195,108 +172,22 @@ <has_text_matching expression="svd_solver='randomized'"/> <has_text_matching expression="random_state=0"/> </assert_stdout> - <output name="anndata_out_h5ad" file="pp.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"/> + <output name="anndata_out" file="pp.recipe_weinreb17.paul15_subsample.updated.h5ad" ftype="h5ad" compare="sim_size"/> </test> <test> - <conditional name="input"> - <param name="format" value="h5ad" /> - <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" /> - </conditional> + <!-- test 4 --> + <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" /> <conditional name="method"> <param name="method" value="pp.recipe_seurat"/> <param name="log" value="True"/> </conditional> - <param name="anndata_output_format" value="h5ad"/> <assert_stdout> <has_text_matching expression="sc.pp.recipe_seurat"/> <has_text_matching expression="log=True"/> </assert_stdout> - <output name="anndata_out_h5ad" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5" compare="sim_size"/> - </test> - <test> - <conditional name="input"> - <param name="format" value="h5ad" /> - <param name="adata" value="krumsiek11.h5ad" /> - </conditional> - <conditional name="method"> - <param name="method" value="pp.log1p"/> - </conditional> - <param name="anndata_output_format" value="h5ad" /> - <assert_stdout> - <has_text_matching expression="sc.pp.log1p"/> - </assert_stdout> - <output name="anndata_out_h5ad" file="pp.log1p.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> - </test> - <test> - <conditional name="input"> - <param name="format" value="h5ad" /> - <param name="adata" value="krumsiek11.h5ad" /> - </conditional> - <conditional name="method"> - <param name="method" value="pp.scale"/> - <param name="zero_center" value="true"/> - </conditional> - <param name="anndata_output_format" value="h5ad" /> - <assert_stdout> - <has_text_matching expression="sc.pp.scale"/> - <has_text_matching expression="zero_center=True"/> - </assert_stdout> - <output name="anndata_out_h5ad" file="pp.scale.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> + <output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size"/> </test> - <test> - <conditional name="input"> - <param name="format" value="h5ad" /> - <param name="adata" value="krumsiek11.h5ad" /> - </conditional> - <conditional name="method"> - <param name="method" value="pp.scale"/> - <param name="zero_center" value="true"/> - <param name="max_value" value="10"/> - </conditional> - <param name="anndata_output_format" value="h5ad" /> - <assert_stdout> - <has_text_matching expression="sc.pp.scale"/> - <has_text_matching expression="zero_center=True"/> - <has_text_matching expression="max_value=10.0"/> - </assert_stdout> - <output name="anndata_out_h5ad" file="pp.scale_max_value.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> - </test> - <test> - <conditional name="input"> - <param name="format" value="h5ad" /> - <param name="adata" value="krumsiek11.h5ad" /> - </conditional> - <conditional name="method"> - <param name="method" value="pp.sqrt"/> - </conditional> - <param name="anndata_output_format" value="h5ad" /> - <assert_stdout> - <has_text_matching expression="sc.pp.sqrt"/> - </assert_stdout> - <output name="anndata_out_h5ad" file="pp.sqrt.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> - </test> - <test> - <conditional name="input"> - <param name="format" value="h5ad" /> - <param name="adata" value="random-randint.h5ad" /> - </conditional> - <conditional name="method"> - <param name="method" value="pp.downsample_counts"/> - <param name="target_counts" value="20000"/> - <param name="random_state" value="0"/> - </conditional> - <param name="anndata_output_format" value="h5ad" /> - <assert_stdout> - <has_text_matching expression="sc.pp.downsample_counts"/> - <has_text_matching expression="target_counts=20000"/> - <has_text_matching expression="random_state=0"/> - </assert_stdout> - <output name="anndata_out_h5ad" ftype="h5"> - <assert_contents> - <has_h5_keys keys="X, obs, var" /> - </assert_contents> - </output> - </test> + </tests> <help><![CDATA[ Normalize total counts per cell (`pp.normalize_per_cell`) @@ -308,7 +199,7 @@ Similar functions are used, for example, by Seurat, Cell Ranger or SPRING. More details on the `scanpy documentation -<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.normalize_per_cell.html>`__ +<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.normalize_per_cell.html>`__ Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`) @@ -327,7 +218,7 @@ - scale to unit variance and shift to zero mean More details on the `scanpy documentation -<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_zheng17.html>`__ +<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.recipe_zheng17.html>`__ Normalization and filtering as of Weinreb et al (2017) (`pp.recipe_weinreb17`) @@ -336,7 +227,7 @@ Expects non-logarithmized data. If using logarithmized data, pass `log=False`. More details on the `scanpy documentation -<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_weinreb17.html>`__ +<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.recipe_weinreb17.html>`__ Normalization and filtering as of Seurat et al (2015) (`pp.recipe_seurat`) @@ -347,33 +238,7 @@ Expects non-logarithmized data. If using logarithmized data, pass `log=False`. More details on the `scanpy documentation -<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_seurat.html>`__ - -Logarithmize the data matrix (`pp.log1p`) -========================================= - -More details on the `scanpy documentation -<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.log1p.html>`__ - -Scale data to unit variance and zero mean (`pp.scale`) -====================================================== - -More details on the `scanpy documentation -<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.scale.html>`__ - -Computes the square root the data matrix (`pp.sqrt`) -==================================================== - -`X = sqrt(X)` - -Downsample counts (`pp.downsample_counts`) -========================================== - -Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This -has been implemented by M. D. Luecken. - -More details on the `scanpy documentation -<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.downsample_counts.html>`__ +<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.recipe_seurat.html>`__ ]]></help> <expand macro="citations"/>