view normalize.xml @ 0:ed64c90a9b93 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 92f85afaed0097d1879317a9f513093fce5481d6
author iuc
date Mon, 04 Mar 2019 10:16:12 -0500
parents
children a9f14e2d1655
line wrap: on
line source

<tool id="scanpy_normalize" name="Normalize with scanpy" version="@galaxy_version@">
    <description></description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[
@CMD@
      ]]></command>
    <configfiles>
        <configfile name="script_file"><![CDATA[
@CMD_imports@
@CMD_read_inputs@

#if $method.method == "pp.normalize_per_cell"
sc.pp.normalize_per_cell(
    data=adata,
    #if $method.counts_per_cell_after
    counts_per_cell_after=$method.counts_per_cell_after,
    #end if
    #if $method.counts_per_cell
    counts_per_cell=np.loadtxt('$method.counts_per_cell'),
    #end if
    key_n_counts='$method.key_n_counts',
    copy=False)
adata.obs.to_csv('$anndata_obs', sep='\t')
#elif $method.method == "pp.recipe_zheng17"
sc.pp.recipe_zheng17(
    adata=adata,
    n_top_genes=$method.n_top_genes,
    log=$method.log,
    plot=False,
    copy=False)
#elif $method.method == "pp.recipe_weinreb17"
sc.pp.recipe_weinreb17(
    adata=adata,
    log=$method.log,
    mean_threshold=$method.mean_threshold,
    cv_threshold=$method.cv_threshold,
    n_pcs=$method.n_pcs,
    svd_solver='$method.svd_solver',
    random_state=$method.random_state,
    copy=False)
#elif $method.method == "pp.recipe_seurat"
sc.pp.recipe_seurat(
    adata=adata,
    log=$method.log,
    plot=False,
    copy=False)
#elif $method.method == "pp.log1p"
sc.pp.log1p(
    data=adata,
    copy=False)
#elif $method.method == "pp.scale"
sc.pp.scale(
    data=adata,
    zero_center=$method.zero_center,
    #if $method.max_value
    max_value=$method.max_value,
    #end if
    copy=False)
#elif $method.method == "pp.sqrt"
sc.pp.sqrt(
    data=adata,
    copy=False)
#elif $method.method == "pp.downsample_counts"
sc.pp.downsample_counts(
    adata=adata,
    target_counts=$method.target_counts,
    random_state=$method.random_state,
    copy=False)
#end if

@CMD_anndata_write_outputs@

]]></configfile>
    </configfiles>
    <inputs>
        <expand macro="inputs_anndata"/>
        <conditional name="method">
            <param argument="method" type="select" label="Method used for plotting">
                <option value="pp.normalize_per_cell">Normalize total counts per cell, using `pp.normalize_per_cell`</option>
                <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using `pp.recipe_zheng17`</option>
                <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using `pp.recipe_weinreb17`</option>
                <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using `pp.recipe_seurat`</option>
                <option value="pp.log1p">Logarithmize the data matrix, using `pp.log1p`</option>
                <option value="pp.scale">Scale data to unit variance and zero mean, using `pp.scale`</option>
                <option value="pp.sqrt">Square root the data matrix, using `pp.sqrt`</option>
                <option value="pp.downsample_counts">Downsample counts, using `pp.downsample_counts`</option>
            </param>
            <when value="pp.normalize_per_cell">
                <param argument="counts_per_cell_after" type="float" value="" optional="true" label="Counts per cell after" help="If not provided, after normalization, each cell has a total count equal to the median of the *counts_per_cell* before normalization."/>
                <param argument="counts_per_cell" type="data" format="tabular,txt" optional="true" label="Precomputed counts per cell" help=""/>
                <param argument="key_n_counts" type="text" value="n_counts" label="Name of the field in `adata.obs` where the total counts per cell will be stored" help=""/>
            </when>
            <when value="pp.recipe_zheng17">
                <param argument="n_top_genes" type="integer" min="0" value="1000" label="Number of genes to keep" help=""/>
                <expand macro="param_log"/>
            </when>
            <when value="pp.recipe_weinreb17">
                <expand macro="param_log"/>
                <param argument="mean_threshold" type="float" value="0.01" label="Mean threshold" help=""/>
                <param argument="cv_threshold" type="float" value="2" label="CV threshold" help=""/>
                <param argument="n_pcs" type="integer" min="0" value="50" label="Number of principal component" help=""/>
                <expand macro="svd_solver"/>
                <expand macro="pca_random_state"/>
            </when>
            <when value="pp.recipe_seurat">
                <expand macro="param_log"/>
            </when>
            <when value="pp.log1p"/>
            <when value="pp.scale">
                <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true"
                    label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/>
                <param argument="max_value" type="float" value="" optional="true" label="Maximum value"
                    help="Clip (truncate) to this value after scaling. If not set, it does not clip."/>
            </when>
            <when value="pp.sqrt"/>
            <when value="pp.downsample_counts">
                <param argument="target_counts" type="integer" min="0" value="20000"
                    label="Target number of counts for downsampling" help="Cells with more counts than 'target_counts' will be downsampled to have 'target_counts' counts."/>
                <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling" help=""/>
            </when>
        </conditional>
        <expand macro="anndata_output_format"/>
    </inputs>
    <outputs>
        <expand macro="anndata_outputs"/>
        <data name="anndata_obs" format="tabular" label="${tool.name} on ${on_string}: Annotation of observations">
            <filter>method['method'] == 'pp.normalize_per_cell'</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <conditional name="input">
                <param name="format" value="h5ad" />
                <param name="adata" value="krumsiek11.h5ad" />
            </conditional>
            <conditional name="method">
                <param name="method" value="pp.normalize_per_cell"/>
                <param name="counts_per_cell_after" value="2"/>
                <param name="counts_per_cell" value="krumsiek11_counts_per_cell"/>
                <param name="key_n_counts" value="n_counts"/>
            </conditional>
            <param name="anndata_output_format" value="h5ad"/>
            <assert_stdout>
                <has_text_matching expression="sc.pp.normalize_per_cell"/>
                <has_text_matching expression="counts_per_cell_after=2.0"/>
                <has_text_matching expression="counts_per_cell=np.loadtxt"/>
                <has_text_matching expression="key_n_counts='n_counts'"/>
            </assert_stdout>
            <output name="anndata_out_h5ad" file="pp.normalize_per_cell.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
            <output name="anndata_obs" file="pp.normalize_per_cell.obs.krumsiek11.tabular"/>
        </test>
        <test>
            <conditional name="input">
                <param name="format" value="h5ad" />
                <param name="adata" value="random-randint.h5ad"/>
            </conditional>
            <conditional name="method">
                <param name="method" value="pp.recipe_zheng17"/>
                <param name="n_top_genes" value="1000"/>
                <param name="log" value="True"/>
            </conditional>
            <param name="anndata_output_format" value="h5ad"/>
            <assert_stdout>
                <has_text_matching expression="sc.pp.recipe_zheng17"/>
                <has_text_matching expression="n_top_genes=1000"/>
                <has_text_matching expression="log=True"/>
            </assert_stdout>
            <output name="anndata_out_h5ad" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5" compare="sim_size"/>
        </test>
        <test>
            <conditional name="input">
                <param name="format" value="h5ad" />
                <param name="adata" value="paul15_subsample.h5ad" />
            </conditional>
            <conditional name="method">
                <param name="method" value="pp.recipe_weinreb17"/>
                <param name="log" value="True"/>
                <param name="mean_threshold" value="0.01"/>
                <param name="cv_threshold" value="2.0"/>
                <param name="n_pcs" value="50"/>
                <param name="svd_solver" value="randomized"/>
                <param name="random_state" value="0"/>
            </conditional>
            <param name="anndata_output_format" value="h5ad" />
            <assert_stdout>
                <has_text_matching expression="sc.pp.recipe_weinreb17"/>
                <has_text_matching expression="log=True"/>
                <has_text_matching expression="mean_threshold=0.01"/>
                <has_text_matching expression="cv_threshold=2.0"/>
                <has_text_matching expression="n_pcs=50"/>
                <has_text_matching expression="svd_solver='randomized'"/>
                <has_text_matching expression="random_state=0"/>
            </assert_stdout>
            <output name="anndata_out_h5ad" file="pp.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"/>
        </test>
        <test>
            <conditional name="input">
                <param name="format" value="h5ad" />
                <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" />
            </conditional>
            <conditional name="method">
                <param name="method" value="pp.recipe_seurat"/>
                <param name="log" value="True"/>
            </conditional>
            <param name="anndata_output_format" value="h5ad"/>
            <assert_stdout>
                <has_text_matching expression="sc.pp.recipe_seurat"/>
                <has_text_matching expression="log=True"/>
            </assert_stdout>
            <output name="anndata_out_h5ad" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5" compare="sim_size"/>
        </test>
        <test>
            <conditional name="input">
                <param name="format" value="h5ad" />
                <param name="adata" value="krumsiek11.h5ad" />
            </conditional>
            <conditional name="method">
                <param name="method" value="pp.log1p"/>
            </conditional>
            <param name="anndata_output_format" value="h5ad" />
            <assert_stdout>
                <has_text_matching expression="sc.pp.log1p"/>
            </assert_stdout>
            <output name="anndata_out_h5ad" file="pp.log1p.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
        </test>
        <test>
            <conditional name="input">
                <param name="format" value="h5ad" />
                <param name="adata" value="krumsiek11.h5ad" />
            </conditional>
            <conditional name="method">
                <param name="method" value="pp.scale"/>
                <param name="zero_center" value="true"/>
            </conditional>
            <param name="anndata_output_format" value="h5ad" />
            <assert_stdout>
                <has_text_matching expression="sc.pp.scale"/>
                <has_text_matching expression="zero_center=True"/>
            </assert_stdout>
            <output name="anndata_out_h5ad" file="pp.scale.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
        </test>
        <test>
            <conditional name="input">
                <param name="format" value="h5ad" />
                <param name="adata" value="krumsiek11.h5ad" />
            </conditional>
            <conditional name="method">
                <param name="method" value="pp.scale"/>
                <param name="zero_center" value="true"/>
                <param name="max_value" value="10"/>
            </conditional>
            <param name="anndata_output_format" value="h5ad" />
            <assert_stdout>
                <has_text_matching expression="sc.pp.scale"/>
                <has_text_matching expression="zero_center=True"/>
                <has_text_matching expression="max_value=10.0"/>
            </assert_stdout>
            <output name="anndata_out_h5ad" file="pp.scale_max_value.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
        </test>
        <test>
            <conditional name="input">
                <param name="format" value="h5ad" />
                <param name="adata" value="krumsiek11.h5ad" />
            </conditional>
            <conditional name="method">
                <param name="method" value="pp.sqrt"/>
            </conditional>
            <param name="anndata_output_format" value="h5ad" />
            <assert_stdout>
                <has_text_matching expression="sc.pp.sqrt"/>
            </assert_stdout>
            <output name="anndata_out_h5ad" file="pp.sqrt.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
        </test>
        <test>
            <conditional name="input">
                <param name="format" value="h5ad" />
                <param name="adata" value="random-randint.h5ad" />
            </conditional>
            <conditional name="method">
                <param name="method" value="pp.downsample_counts"/>
                <param name="target_counts" value="20000"/>
                <param name="random_state" value="0"/>
            </conditional>
            <param name="anndata_output_format" value="h5ad" />
            <assert_stdout>
                <has_text_matching expression="sc.pp.downsample_counts"/>
                <has_text_matching expression="target_counts=20000"/>
                <has_text_matching expression="random_state=0"/>
            </assert_stdout>
            <output name="anndata_out_h5ad" ftype="h5">
                <assert_contents>
                    <has_h5_keys keys="X, obs, var" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
Normalize total counts per cell (`pp.normalize_per_cell`)
=========================================================

Normalize each cell by total counts over all genes, so that every cell has
the same total count after normalization.

Similar functions are used, for example, by Seurat, Cell Ranger or SPRING.

More details on the `scanpy documentation
<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.normalize_per_cell.html>`__


Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`)
==================================================================================================================

Expects non-logarithmized data. If using logarithmized data, pass `log=False`.

The recipe runs the following steps:

- only consider genes with more than 1 count
- normalize with total UMI count per cell
- select highly-variable genes
- subset the genes
- renormalize after filtering
- log transform (if needed)
- scale to unit variance and shift to zero mean

More details on the `scanpy documentation
<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_zheng17.html>`__


Normalization and filtering as of Weinreb et al (2017) (`pp.recipe_weinreb17`)
==============================================================================

Expects non-logarithmized data. If using logarithmized data, pass `log=False`.

More details on the `scanpy documentation
<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_weinreb17.html>`__


Normalization and filtering as of Seurat et al (2015) (`pp.recipe_seurat`)
==========================================================================

This uses a particular preprocessing.

Expects non-logarithmized data. If using logarithmized data, pass `log=False`.

More details on the `scanpy documentation
<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_seurat.html>`__

Logarithmize the data matrix (`pp.log1p`)
=========================================

More details on the `scanpy documentation
<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.log1p.html>`__

Scale data to unit variance and zero mean (`pp.scale`)
======================================================

More details on the `scanpy documentation
<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.scale.html>`__

Computes the square root the data matrix (`pp.sqrt`)
====================================================

`X = sqrt(X)`

Downsample counts (`pp.downsample_counts`)
==========================================

Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This
has been implemented by M. D. Luecken.

More details on the `scanpy documentation
<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.downsample_counts.html>`__

    ]]></help>
    <expand macro="citations"/>
</tool>