scanpy_normalize: normalize.xml comparison

comparison normalize.xml @ 0:ed64c90a9b93 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 92f85afaed0097d1879317a9f513093fce5481d6

author	iuc
date	Mon, 04 Mar 2019 10:16:12 -0500
parents
children	a9f14e2d1655

comparison

equal deleted inserted replaced

--1:000000000000
+:ed64c90a9b93
+<tool id="scanpy_normalize" name="Normalize with scanpy" version="@galaxy_version@">
+<description></description>
+<macros>
+<import>macros.xml</import>
+</macros>
+<expand macro="requirements"/>
+<expand macro="version_command"/>
+<command detect_errors="exit_code"><![CDATA[
+@CMD@
+]]></command>
+<configfiles>
+<configfile name="script_file"><![CDATA[
+@CMD_imports@
+@CMD_read_inputs@
+#if $method.method == "pp.normalize_per_cell"
+sc.pp.normalize_per_cell(
+data=adata,
+#if $method.counts_per_cell_after
+counts_per_cell_after=$method.counts_per_cell_after,
+#end if
+#if $method.counts_per_cell
+counts_per_cell=np.loadtxt('$method.counts_per_cell'),
+#end if
+key_n_counts='$method.key_n_counts',
+copy=False)
+adata.obs.to_csv('$anndata_obs', sep='\t')
+#elif $method.method == "pp.recipe_zheng17"
+sc.pp.recipe_zheng17(
+adata=adata,
+n_top_genes=$method.n_top_genes,
+log=$method.log,
+plot=False,
+copy=False)
+#elif $method.method == "pp.recipe_weinreb17"
+sc.pp.recipe_weinreb17(
+adata=adata,
+log=$method.log,
+mean_threshold=$method.mean_threshold,
+cv_threshold=$method.cv_threshold,
+n_pcs=$method.n_pcs,
+svd_solver='$method.svd_solver',
+random_state=$method.random_state,
+copy=False)
+#elif $method.method == "pp.recipe_seurat"
+sc.pp.recipe_seurat(
+adata=adata,
+log=$method.log,
+plot=False,
+copy=False)
+#elif $method.method == "pp.log1p"
+sc.pp.log1p(
+data=adata,
+copy=False)
+#elif $method.method == "pp.scale"
+sc.pp.scale(
+data=adata,
+zero_center=$method.zero_center,
+#if $method.max_value
+max_value=$method.max_value,
+#end if
+copy=False)
+#elif $method.method == "pp.sqrt"
+sc.pp.sqrt(
+data=adata,
+copy=False)
+#elif $method.method == "pp.downsample_counts"
+sc.pp.downsample_counts(
+adata=adata,
+target_counts=$method.target_counts,
+random_state=$method.random_state,
+copy=False)
+#end if
+@CMD_anndata_write_outputs@
+]]></configfile>
+</configfiles>
+<inputs>
+<expand macro="inputs_anndata"/>
+<conditional name="method">
+<param argument="method" type="select" label="Method used for plotting">
+<option value="pp.normalize_per_cell">Normalize total counts per cell, using `pp.normalize_per_cell`</option>
+<option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using `pp.recipe_zheng17`</option>
+<option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using `pp.recipe_weinreb17`</option>
+<option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using `pp.recipe_seurat`</option>
+<option value="pp.log1p">Logarithmize the data matrix, using `pp.log1p`</option>
+<option value="pp.scale">Scale data to unit variance and zero mean, using `pp.scale`</option>
+<option value="pp.sqrt">Square root the data matrix, using `pp.sqrt`</option>
+<option value="pp.downsample_counts">Downsample counts, using `pp.downsample_counts`</option>
+</param>
+<when value="pp.normalize_per_cell">
+<param argument="counts_per_cell_after" type="float" value="" optional="true" label="Counts per cell after" help="If not provided, after normalization, each cell has a total count equal to the median of the *counts_per_cell* before normalization."/>
+<param argument="counts_per_cell" type="data" format="tabular,txt" optional="true" label="Precomputed counts per cell" help=""/>
+<param argument="key_n_counts" type="text" value="n_counts" label="Name of the field in `adata.obs` where the total counts per cell will be stored" help=""/>
+</when>
+<when value="pp.recipe_zheng17">
+<param argument="n_top_genes" type="integer" min="0" value="1000" label="Number of genes to keep" help=""/>
+<expand macro="param_log"/>
+</when>
+<when value="pp.recipe_weinreb17">
+<expand macro="param_log"/>
+<param argument="mean_threshold" type="float" value="0.01" label="Mean threshold" help=""/>
+<param argument="cv_threshold" type="float" value="2" label="CV threshold" help=""/>
+<param argument="n_pcs" type="integer" min="0" value="50" label="Number of principal component" help=""/>
+<expand macro="svd_solver"/>
+<expand macro="pca_random_state"/>
+</when>
+<when value="pp.recipe_seurat">
+<expand macro="param_log"/>
+</when>
+<when value="pp.log1p"/>
+<when value="pp.scale">
+<param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true"
+label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/>
+<param argument="max_value" type="float" value="" optional="true" label="Maximum value"
+help="Clip (truncate) to this value after scaling. If not set, it does not clip."/>
+</when>
+<when value="pp.sqrt"/>
+<when value="pp.downsample_counts">
+<param argument="target_counts" type="integer" min="0" value="20000"
+label="Target number of counts for downsampling" help="Cells with more counts than 'target_counts' will be downsampled to have 'target_counts' counts."/>
+<param argument="random_state" type="integer" value="0" label="Random seed to change subsampling" help=""/>
+</when>
+</conditional>
+<expand macro="anndata_output_format"/>
+</inputs>
+<outputs>
+<expand macro="anndata_outputs"/>
+<data name="anndata_obs" format="tabular" label="${tool.name} on ${on_string}: Annotation of observations">
+<filter>method['method'] == 'pp.normalize_per_cell'</filter>
+</data>
+</outputs>
+<tests>
+<test>
+<conditional name="input">
+<param name="format" value="h5ad" />
+<param name="adata" value="krumsiek11.h5ad" />
+</conditional>
+<conditional name="method">
+<param name="method" value="pp.normalize_per_cell"/>
+<param name="counts_per_cell_after" value="2"/>
+<param name="counts_per_cell" value="krumsiek11_counts_per_cell"/>
+<param name="key_n_counts" value="n_counts"/>
+</conditional>
+<param name="anndata_output_format" value="h5ad"/>
+<assert_stdout>
+<has_text_matching expression="sc.pp.normalize_per_cell"/>
+<has_text_matching expression="counts_per_cell_after=2.0"/>
+<has_text_matching expression="counts_per_cell=np.loadtxt"/>
+<has_text_matching expression="key_n_counts='n_counts'"/>
+</assert_stdout>
+<output name="anndata_out_h5ad" file="pp.normalize_per_cell.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
+<output name="anndata_obs" file="pp.normalize_per_cell.obs.krumsiek11.tabular"/>
+</test>
+<test>
+<conditional name="input">
+<param name="format" value="h5ad" />
+<param name="adata" value="random-randint.h5ad"/>
+</conditional>
+<conditional name="method">
+<param name="method" value="pp.recipe_zheng17"/>
+<param name="n_top_genes" value="1000"/>
+<param name="log" value="True"/>
+</conditional>
+<param name="anndata_output_format" value="h5ad"/>
+<assert_stdout>
+<has_text_matching expression="sc.pp.recipe_zheng17"/>
+<has_text_matching expression="n_top_genes=1000"/>
+<has_text_matching expression="log=True"/>
+</assert_stdout>
+<output name="anndata_out_h5ad" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5" compare="sim_size"/>
+</test>
+<test>
+<conditional name="input">
+<param name="format" value="h5ad" />
+<param name="adata" value="paul15_subsample.h5ad" />
+</conditional>
+<conditional name="method">
+<param name="method" value="pp.recipe_weinreb17"/>
+<param name="log" value="True"/>
+<param name="mean_threshold" value="0.01"/>
+<param name="cv_threshold" value="2.0"/>
+<param name="n_pcs" value="50"/>
+<param name="svd_solver" value="randomized"/>
+<param name="random_state" value="0"/>
+</conditional>
+<param name="anndata_output_format" value="h5ad" />
+<assert_stdout>
+<has_text_matching expression="sc.pp.recipe_weinreb17"/>
+<has_text_matching expression="log=True"/>
+<has_text_matching expression="mean_threshold=0.01"/>
+<has_text_matching expression="cv_threshold=2.0"/>
+<has_text_matching expression="n_pcs=50"/>
+<has_text_matching expression="svd_solver='randomized'"/>
+<has_text_matching expression="random_state=0"/>
+</assert_stdout>
+<output name="anndata_out_h5ad" file="pp.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"/>
+</test>
+<test>
+<conditional name="input">
+<param name="format" value="h5ad" />
+<param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" />
+</conditional>
+<conditional name="method">
+<param name="method" value="pp.recipe_seurat"/>
+<param name="log" value="True"/>
+</conditional>
+<param name="anndata_output_format" value="h5ad"/>
+<assert_stdout>
+<has_text_matching expression="sc.pp.recipe_seurat"/>
+<has_text_matching expression="log=True"/>
+</assert_stdout>
+<output name="anndata_out_h5ad" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5" compare="sim_size"/>
+</test>
+<test>
+<conditional name="input">
+<param name="format" value="h5ad" />
+<param name="adata" value="krumsiek11.h5ad" />
+</conditional>
+<conditional name="method">
+<param name="method" value="pp.log1p"/>
+</conditional>
+<param name="anndata_output_format" value="h5ad" />
+<assert_stdout>
+<has_text_matching expression="sc.pp.log1p"/>
+</assert_stdout>
+<output name="anndata_out_h5ad" file="pp.log1p.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
+</test>
+<test>
+<conditional name="input">
+<param name="format" value="h5ad" />
+<param name="adata" value="krumsiek11.h5ad" />
+</conditional>
+<conditional name="method">
+<param name="method" value="pp.scale"/>
+<param name="zero_center" value="true"/>
+</conditional>
+<param name="anndata_output_format" value="h5ad" />
+<assert_stdout>
+<has_text_matching expression="sc.pp.scale"/>
+<has_text_matching expression="zero_center=True"/>
+</assert_stdout>
+<output name="anndata_out_h5ad" file="pp.scale.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
+</test>
+<test>
+<conditional name="input">
+<param name="format" value="h5ad" />
+<param name="adata" value="krumsiek11.h5ad" />
+</conditional>
+<conditional name="method">
+<param name="method" value="pp.scale"/>
+<param name="zero_center" value="true"/>
+<param name="max_value" value="10"/>
+</conditional>
+<param name="anndata_output_format" value="h5ad" />
+<assert_stdout>
+<has_text_matching expression="sc.pp.scale"/>
+<has_text_matching expression="zero_center=True"/>
+<has_text_matching expression="max_value=10.0"/>
+</assert_stdout>
+<output name="anndata_out_h5ad" file="pp.scale_max_value.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
+</test>
+<test>
+<conditional name="input">
+<param name="format" value="h5ad" />
+<param name="adata" value="krumsiek11.h5ad" />
+</conditional>
+<conditional name="method">
+<param name="method" value="pp.sqrt"/>
+</conditional>
+<param name="anndata_output_format" value="h5ad" />
+<assert_stdout>
+<has_text_matching expression="sc.pp.sqrt"/>
+</assert_stdout>
+<output name="anndata_out_h5ad" file="pp.sqrt.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
+</test>
+<test>
+<conditional name="input">
+<param name="format" value="h5ad" />
+<param name="adata" value="random-randint.h5ad" />
+</conditional>
+<conditional name="method">
+<param name="method" value="pp.downsample_counts"/>
+<param name="target_counts" value="20000"/>
+<param name="random_state" value="0"/>
+</conditional>
+<param name="anndata_output_format" value="h5ad" />
+<assert_stdout>
+<has_text_matching expression="sc.pp.downsample_counts"/>
+<has_text_matching expression="target_counts=20000"/>
+<has_text_matching expression="random_state=0"/>
+</assert_stdout>
+<output name="anndata_out_h5ad" ftype="h5">
+<assert_contents>
+<has_h5_keys keys="X, obs, var" />
+</assert_contents>
+</output>
+</test>
+</tests>
+<help><![CDATA[
+Normalize total counts per cell (`pp.normalize_per_cell`)
+=========================================================
+Normalize each cell by total counts over all genes, so that every cell has
+the same total count after normalization.
+Similar functions are used, for example, by Seurat, Cell Ranger or SPRING.
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.normalize_per_cell.html>`__
+Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`)
+==================================================================================================================
+Expects non-logarithmized data. If using logarithmized data, pass `log=False`.
+The recipe runs the following steps:
+- only consider genes with more than 1 count
+- normalize with total UMI count per cell
+- select highly-variable genes
+- subset the genes
+- renormalize after filtering
+- log transform (if needed)
+- scale to unit variance and shift to zero mean
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_zheng17.html>`__
+Normalization and filtering as of Weinreb et al (2017) (`pp.recipe_weinreb17`)
+==============================================================================
+Expects non-logarithmized data. If using logarithmized data, pass `log=False`.
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_weinreb17.html>`__
+Normalization and filtering as of Seurat et al (2015) (`pp.recipe_seurat`)
+==========================================================================
+This uses a particular preprocessing.
+Expects non-logarithmized data. If using logarithmized data, pass `log=False`.
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_seurat.html>`__
+Logarithmize the data matrix (`pp.log1p`)
+=========================================
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.log1p.html>`__
+Scale data to unit variance and zero mean (`pp.scale`)
+======================================================
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.scale.html>`__
+Computes the square root the data matrix (`pp.sqrt`)
+====================================================
+`X = sqrt(X)`
+Downsample counts (`pp.downsample_counts`)
+==========================================
+Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This
+has been implemented by M. D. Luecken.
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.downsample_counts.html>`__
+]]></help>
+<expand macro="citations"/>
+</tool>

Mercurial > repos > iuc > scanpy_normalize

comparison normalize.xml @ 0:ed64c90a9b93 draft