Mercurial > repos > iuc > scanpy_normalize
diff normalize.xml @ 0:ed64c90a9b93 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 92f85afaed0097d1879317a9f513093fce5481d6
author | iuc |
---|---|
date | Mon, 04 Mar 2019 10:16:12 -0500 |
parents | |
children | a9f14e2d1655 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/normalize.xml Mon Mar 04 10:16:12 2019 -0500 @@ -0,0 +1,380 @@ +<tool id="scanpy_normalize" name="Normalize with scanpy" version="@galaxy_version@"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="version_command"/> + <command detect_errors="exit_code"><![CDATA[ +@CMD@ + ]]></command> + <configfiles> + <configfile name="script_file"><![CDATA[ +@CMD_imports@ +@CMD_read_inputs@ + +#if $method.method == "pp.normalize_per_cell" +sc.pp.normalize_per_cell( + data=adata, + #if $method.counts_per_cell_after + counts_per_cell_after=$method.counts_per_cell_after, + #end if + #if $method.counts_per_cell + counts_per_cell=np.loadtxt('$method.counts_per_cell'), + #end if + key_n_counts='$method.key_n_counts', + copy=False) +adata.obs.to_csv('$anndata_obs', sep='\t') +#elif $method.method == "pp.recipe_zheng17" +sc.pp.recipe_zheng17( + adata=adata, + n_top_genes=$method.n_top_genes, + log=$method.log, + plot=False, + copy=False) +#elif $method.method == "pp.recipe_weinreb17" +sc.pp.recipe_weinreb17( + adata=adata, + log=$method.log, + mean_threshold=$method.mean_threshold, + cv_threshold=$method.cv_threshold, + n_pcs=$method.n_pcs, + svd_solver='$method.svd_solver', + random_state=$method.random_state, + copy=False) +#elif $method.method == "pp.recipe_seurat" +sc.pp.recipe_seurat( + adata=adata, + log=$method.log, + plot=False, + copy=False) +#elif $method.method == "pp.log1p" +sc.pp.log1p( + data=adata, + copy=False) +#elif $method.method == "pp.scale" +sc.pp.scale( + data=adata, + zero_center=$method.zero_center, + #if $method.max_value + max_value=$method.max_value, + #end if + copy=False) +#elif $method.method == "pp.sqrt" +sc.pp.sqrt( + data=adata, + copy=False) +#elif $method.method == "pp.downsample_counts" +sc.pp.downsample_counts( + adata=adata, + target_counts=$method.target_counts, + random_state=$method.random_state, + copy=False) +#end if + +@CMD_anndata_write_outputs@ + +]]></configfile> + </configfiles> + <inputs> + <expand macro="inputs_anndata"/> + <conditional name="method"> + <param argument="method" type="select" label="Method used for plotting"> + <option value="pp.normalize_per_cell">Normalize total counts per cell, using `pp.normalize_per_cell`</option> + <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using `pp.recipe_zheng17`</option> + <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using `pp.recipe_weinreb17`</option> + <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using `pp.recipe_seurat`</option> + <option value="pp.log1p">Logarithmize the data matrix, using `pp.log1p`</option> + <option value="pp.scale">Scale data to unit variance and zero mean, using `pp.scale`</option> + <option value="pp.sqrt">Square root the data matrix, using `pp.sqrt`</option> + <option value="pp.downsample_counts">Downsample counts, using `pp.downsample_counts`</option> + </param> + <when value="pp.normalize_per_cell"> + <param argument="counts_per_cell_after" type="float" value="" optional="true" label="Counts per cell after" help="If not provided, after normalization, each cell has a total count equal to the median of the *counts_per_cell* before normalization."/> + <param argument="counts_per_cell" type="data" format="tabular,txt" optional="true" label="Precomputed counts per cell" help=""/> + <param argument="key_n_counts" type="text" value="n_counts" label="Name of the field in `adata.obs` where the total counts per cell will be stored" help=""/> + </when> + <when value="pp.recipe_zheng17"> + <param argument="n_top_genes" type="integer" min="0" value="1000" label="Number of genes to keep" help=""/> + <expand macro="param_log"/> + </when> + <when value="pp.recipe_weinreb17"> + <expand macro="param_log"/> + <param argument="mean_threshold" type="float" value="0.01" label="Mean threshold" help=""/> + <param argument="cv_threshold" type="float" value="2" label="CV threshold" help=""/> + <param argument="n_pcs" type="integer" min="0" value="50" label="Number of principal component" help=""/> + <expand macro="svd_solver"/> + <expand macro="pca_random_state"/> + </when> + <when value="pp.recipe_seurat"> + <expand macro="param_log"/> + </when> + <when value="pp.log1p"/> + <when value="pp.scale"> + <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true" + label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/> + <param argument="max_value" type="float" value="" optional="true" label="Maximum value" + help="Clip (truncate) to this value after scaling. If not set, it does not clip."/> + </when> + <when value="pp.sqrt"/> + <when value="pp.downsample_counts"> + <param argument="target_counts" type="integer" min="0" value="20000" + label="Target number of counts for downsampling" help="Cells with more counts than 'target_counts' will be downsampled to have 'target_counts' counts."/> + <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling" help=""/> + </when> + </conditional> + <expand macro="anndata_output_format"/> + </inputs> + <outputs> + <expand macro="anndata_outputs"/> + <data name="anndata_obs" format="tabular" label="${tool.name} on ${on_string}: Annotation of observations"> + <filter>method['method'] == 'pp.normalize_per_cell'</filter> + </data> + </outputs> + <tests> + <test> + <conditional name="input"> + <param name="format" value="h5ad" /> + <param name="adata" value="krumsiek11.h5ad" /> + </conditional> + <conditional name="method"> + <param name="method" value="pp.normalize_per_cell"/> + <param name="counts_per_cell_after" value="2"/> + <param name="counts_per_cell" value="krumsiek11_counts_per_cell"/> + <param name="key_n_counts" value="n_counts"/> + </conditional> + <param name="anndata_output_format" value="h5ad"/> + <assert_stdout> + <has_text_matching expression="sc.pp.normalize_per_cell"/> + <has_text_matching expression="counts_per_cell_after=2.0"/> + <has_text_matching expression="counts_per_cell=np.loadtxt"/> + <has_text_matching expression="key_n_counts='n_counts'"/> + </assert_stdout> + <output name="anndata_out_h5ad" file="pp.normalize_per_cell.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> + <output name="anndata_obs" file="pp.normalize_per_cell.obs.krumsiek11.tabular"/> + </test> + <test> + <conditional name="input"> + <param name="format" value="h5ad" /> + <param name="adata" value="random-randint.h5ad"/> + </conditional> + <conditional name="method"> + <param name="method" value="pp.recipe_zheng17"/> + <param name="n_top_genes" value="1000"/> + <param name="log" value="True"/> + </conditional> + <param name="anndata_output_format" value="h5ad"/> + <assert_stdout> + <has_text_matching expression="sc.pp.recipe_zheng17"/> + <has_text_matching expression="n_top_genes=1000"/> + <has_text_matching expression="log=True"/> + </assert_stdout> + <output name="anndata_out_h5ad" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5" compare="sim_size"/> + </test> + <test> + <conditional name="input"> + <param name="format" value="h5ad" /> + <param name="adata" value="paul15_subsample.h5ad" /> + </conditional> + <conditional name="method"> + <param name="method" value="pp.recipe_weinreb17"/> + <param name="log" value="True"/> + <param name="mean_threshold" value="0.01"/> + <param name="cv_threshold" value="2.0"/> + <param name="n_pcs" value="50"/> + <param name="svd_solver" value="randomized"/> + <param name="random_state" value="0"/> + </conditional> + <param name="anndata_output_format" value="h5ad" /> + <assert_stdout> + <has_text_matching expression="sc.pp.recipe_weinreb17"/> + <has_text_matching expression="log=True"/> + <has_text_matching expression="mean_threshold=0.01"/> + <has_text_matching expression="cv_threshold=2.0"/> + <has_text_matching expression="n_pcs=50"/> + <has_text_matching expression="svd_solver='randomized'"/> + <has_text_matching expression="random_state=0"/> + </assert_stdout> + <output name="anndata_out_h5ad" file="pp.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"/> + </test> + <test> + <conditional name="input"> + <param name="format" value="h5ad" /> + <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" /> + </conditional> + <conditional name="method"> + <param name="method" value="pp.recipe_seurat"/> + <param name="log" value="True"/> + </conditional> + <param name="anndata_output_format" value="h5ad"/> + <assert_stdout> + <has_text_matching expression="sc.pp.recipe_seurat"/> + <has_text_matching expression="log=True"/> + </assert_stdout> + <output name="anndata_out_h5ad" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5" compare="sim_size"/> + </test> + <test> + <conditional name="input"> + <param name="format" value="h5ad" /> + <param name="adata" value="krumsiek11.h5ad" /> + </conditional> + <conditional name="method"> + <param name="method" value="pp.log1p"/> + </conditional> + <param name="anndata_output_format" value="h5ad" /> + <assert_stdout> + <has_text_matching expression="sc.pp.log1p"/> + </assert_stdout> + <output name="anndata_out_h5ad" file="pp.log1p.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> + </test> + <test> + <conditional name="input"> + <param name="format" value="h5ad" /> + <param name="adata" value="krumsiek11.h5ad" /> + </conditional> + <conditional name="method"> + <param name="method" value="pp.scale"/> + <param name="zero_center" value="true"/> + </conditional> + <param name="anndata_output_format" value="h5ad" /> + <assert_stdout> + <has_text_matching expression="sc.pp.scale"/> + <has_text_matching expression="zero_center=True"/> + </assert_stdout> + <output name="anndata_out_h5ad" file="pp.scale.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> + </test> + <test> + <conditional name="input"> + <param name="format" value="h5ad" /> + <param name="adata" value="krumsiek11.h5ad" /> + </conditional> + <conditional name="method"> + <param name="method" value="pp.scale"/> + <param name="zero_center" value="true"/> + <param name="max_value" value="10"/> + </conditional> + <param name="anndata_output_format" value="h5ad" /> + <assert_stdout> + <has_text_matching expression="sc.pp.scale"/> + <has_text_matching expression="zero_center=True"/> + <has_text_matching expression="max_value=10.0"/> + </assert_stdout> + <output name="anndata_out_h5ad" file="pp.scale_max_value.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> + </test> + <test> + <conditional name="input"> + <param name="format" value="h5ad" /> + <param name="adata" value="krumsiek11.h5ad" /> + </conditional> + <conditional name="method"> + <param name="method" value="pp.sqrt"/> + </conditional> + <param name="anndata_output_format" value="h5ad" /> + <assert_stdout> + <has_text_matching expression="sc.pp.sqrt"/> + </assert_stdout> + <output name="anndata_out_h5ad" file="pp.sqrt.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> + </test> + <test> + <conditional name="input"> + <param name="format" value="h5ad" /> + <param name="adata" value="random-randint.h5ad" /> + </conditional> + <conditional name="method"> + <param name="method" value="pp.downsample_counts"/> + <param name="target_counts" value="20000"/> + <param name="random_state" value="0"/> + </conditional> + <param name="anndata_output_format" value="h5ad" /> + <assert_stdout> + <has_text_matching expression="sc.pp.downsample_counts"/> + <has_text_matching expression="target_counts=20000"/> + <has_text_matching expression="random_state=0"/> + </assert_stdout> + <output name="anndata_out_h5ad" ftype="h5"> + <assert_contents> + <has_h5_keys keys="X, obs, var" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +Normalize total counts per cell (`pp.normalize_per_cell`) +========================================================= + +Normalize each cell by total counts over all genes, so that every cell has +the same total count after normalization. + +Similar functions are used, for example, by Seurat, Cell Ranger or SPRING. + +More details on the `scanpy documentation +<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.normalize_per_cell.html>`__ + + +Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`) +================================================================================================================== + +Expects non-logarithmized data. If using logarithmized data, pass `log=False`. + +The recipe runs the following steps: + +- only consider genes with more than 1 count +- normalize with total UMI count per cell +- select highly-variable genes +- subset the genes +- renormalize after filtering +- log transform (if needed) +- scale to unit variance and shift to zero mean + +More details on the `scanpy documentation +<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_zheng17.html>`__ + + +Normalization and filtering as of Weinreb et al (2017) (`pp.recipe_weinreb17`) +============================================================================== + +Expects non-logarithmized data. If using logarithmized data, pass `log=False`. + +More details on the `scanpy documentation +<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_weinreb17.html>`__ + + +Normalization and filtering as of Seurat et al (2015) (`pp.recipe_seurat`) +========================================================================== + +This uses a particular preprocessing. + +Expects non-logarithmized data. If using logarithmized data, pass `log=False`. + +More details on the `scanpy documentation +<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_seurat.html>`__ + +Logarithmize the data matrix (`pp.log1p`) +========================================= + +More details on the `scanpy documentation +<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.log1p.html>`__ + +Scale data to unit variance and zero mean (`pp.scale`) +====================================================== + +More details on the `scanpy documentation +<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.scale.html>`__ + +Computes the square root the data matrix (`pp.sqrt`) +==================================================== + +`X = sqrt(X)` + +Downsample counts (`pp.downsample_counts`) +========================================== + +Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This +has been implemented by M. D. Luecken. + +More details on the `scanpy documentation +<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.downsample_counts.html>`__ + + ]]></help> + <expand macro="citations"/> +</tool>