view inspect.xml @ 0:5d2e17328afe draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 92f85afaed0097d1879317a9f513093fce5481d6
author iuc
date Mon, 04 Mar 2019 10:15:38 -0500
parents
children a755eaa1cc32
line wrap: on
line source

<tool id="scanpy_inspect" name="Inspect with scanpy" version="@galaxy_version@">
    <description></description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[
@CMD@
      ]]></command>
    <configfiles>
        <configfile name="script_file"><![CDATA[
@CMD_imports@
@CMD_read_inputs@

#if $method.method == "tl.paga"
sc.tl.paga(
    adata=adata,
    groups='$method.groups',
    use_rna_velocity =$method.use_rna_velocity,
    model='$method.model',
    copy=False)
#elif $method.method == "tl.dpt"
sc.tl.dpt(
    adata=adata,
    n_dcs=$method.n_dcs,
    n_branchings=$method.n_branchings,
    min_group_size=$method.min_group_size,
    allow_kendall_tau_shift=$method.allow_kendall_tau_shift,
    copy=False)
adata.obs.to_csv('$obs', sep='\t')
#end if

@CMD_anndata_write_outputs@
]]></configfile>
    </configfiles>
    <inputs>
        <expand macro="inputs_anndata"/>
        <conditional name="method">
            <param argument="method" type="select" label="Method used for plotting">
                <!--<option value="tl.paga_compare_paths">, using `tl.paga_compare_paths`</option>!-->
                <!--<option value="tl.paga_degrees">, using `tl.paga_degrees`</option>!-->
                <!--<option value="tl.paga_expression_entropies">, using `tl.paga_expression_entropies`</option>!-->
                <option value="tl.paga">Generate cellular maps of differentiation manifolds with complex topologies, using `tl.paga`</option>
                <option value="tl.dpt">Infer progression of cells through geodesic distance along the graph, using `tl.dpt`</option>
            </param>
            <when value="tl.paga">
                <param argument="groups" type="text" value="louvain" label="Key for categorical in the input" help="You can pass your predefined groups by choosing any categorical annotation of observations (`adata.obs`)."/>
                <param argument="use_rna_velocity" type="boolean" truevalue="False" falsevalue="False" checked="false" label="Use RNA velocity to orient edges in the abstracted graph and estimate transitions?" help="Requires that `adata.uns` contains a directed single-cell graph with key `['velocyto_transitions']`. This feature might be subject to change in the future."/>
                <param argument="model" type="select" label="PAGA connectivity model" help="">
                    <option value="v1.2">v1.2</option>
                    <option value="v1.0">v1.0</option>
                </param>
            </when>
            <when value="tl.dpt">
                <param argument="n_dcs" type="integer" min="0" value="10" label="Number of diffusion components to use" help=""/>
                <param argument="n_branchings" type="integer" min="0" value="0" label="Number of branchings to detect" help=""/>
                <param argument="min_group_size" type="float" min="0" value="0.01" label="Min group size" help="During recursive splitting of branches ('dpt groups') for `n_branchings` &gt; 1, do not consider groups that contain less than `min_group_size` data points. If a float, `min_group_size` refers to a fraction of the total number of data points."/>
                <param argument="allow_kendall_tau_shift" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Allow Kendal tau shift?" help="If a very small branch is detected upon splitting, shift away from maximum correlation in Kendall tau criterion of Haghverdi et al (2016) to stabilize the splitting."/>
            </when>
        </conditional>
        <expand macro="anndata_output_format"/>
    </inputs>
    <outputs>
        <expand macro="anndata_outputs"/>
        <data name="obs" format="tabular" label="${tool.name} on ${on_string}: Observations annotation">
            <filter>method['method'] == 'tl.dpt'</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <conditional name="input">
                <param name="format" value="h5ad" />
                <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" />
            </conditional>
            <conditional name="method">
                <param name="method" value="tl.paga"/>
                <param name="groups" value="paul15_clusters"/>
                <param name="use_rna_velocity" value="False"/>
                <param name="model" value="v1.2"/>
            </conditional>
            <param name="anndata_output_format" value="h5ad" />
            <assert_stdout>
                <has_text_matching expression="sc.tl.paga"/>
                <has_text_matching expression="groups='paul15_clusters'"/>
                <has_text_matching expression="use_rna_velocity =False"/>
                <has_text_matching expression="model='v1.2'"/>
            </assert_stdout>
            <output name="anndata_out_h5ad" file="tl.paga.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size">
                <assert_contents>
                    <has_h5_keys keys="X, obs, obsm, uns, var" />
                </assert_contents>
            </output>
        </test>
        <test>
            <conditional name="input">
                <param name="format" value="h5ad" />
                <param name="adata" value="tl.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" />
            </conditional>
            <conditional name="method">
                <param name="method" value="tl.dpt"/>
                <param name="n_dcs" value="15"/>
                <param name="n_branchings" value="1"/>
                <param name="min_group_size" value="0.01"/>
                <param name="allow_kendall_tau_shift" value="True"/>
            </conditional>
            <param name="anndata_output_format" value="h5ad" />
            <assert_stdout>
                <has_text_matching expression="sc.tl.dpt"/>
                <has_text_matching expression="n_dcs=15"/>
                <has_text_matching expression="n_branchings=1"/>
                <has_text_matching expression="min_group_size=0.01"/>
                <has_text_matching expression="allow_kendall_tau_shift=True"/>
            </assert_stdout>
            <output name="anndata_out_h5ad" file="tl.dpt.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size">
                <assert_contents>
                    <has_h5_keys keys="X, obs, obsm, uns, var" />
                </assert_contents>
            </output>
            <output name="obs" file="tl.dpt.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.obs.tabular" compare="sim_size"/>
        </test>
    </tests>
    <help><![CDATA[
Generate cellular maps of differentiation manifolds with complex topologies (`tl.paga`)
=======================================================================================

By quantifying the connectivity of partitions (groups, clusters) of the
single-cell graph, partition-based graph abstraction (PAGA) generates a much
simpler abstracted graph (*PAGA graph*) of partitions, in which edge weights
represent confidence in the presence of connections. By tresholding this
confidence in `paga`, a much simpler representation of data
can be obtained.

The confidence can be interpreted as the ratio of the actual versus the
expected value of connetions under the null model of randomly connecting
partitions. We do not provide a p-value as this null model does not
precisely capture what one would consider "connected" in real data, hence it
strongly overestimates the expected value. See an extensive discussion of
this in Wolf et al (2017).

Together with a random walk-based distance measure, this generates a partial
coordinatization of data useful for exploring and explaining its variation.

More details on the `tl.paga scanpy documentation
<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.paga.html#scanpy.api.tl.paga>`_


Infer progression of cells through geodesic distance along the graph (`tl.dpt`)
===============================================================================

Reconstruct the progression of a biological process from snapshot
data. `Diffusion Pseudotime` has been introduced by Haghverdi et al (2016) and
implemented within Scanpy (Wolf et al, 2017). Here, we use a further developed
version, which is able to deal with disconnected graphs (Wolf et al, 2017) and can
be run in a `hierarchical` mode by setting the parameter
`n_branchings>1`. We recommend, however, to only use
`tl.dpt` for computing pseudotime (`n_branchings=0`) and
to detect branchings via `paga`. For pseudotime, you need
to annotate your data with a root cell. 

This requires to run `pp.neighbors`, first. In order to
reproduce the original implementation of DPT, use `method=='gauss'` in
this. Using the default `method=='umap'` only leads to minor quantitative
differences, though.


If `n_branchings==0`, no field `dpt_groups` will be written.

- dpt_pseudotime : Array of dim (number of samples) that stores the pseudotime of each cell, that is, the DPT distance with respect to the root cell.
- dpt_groups : Array of dim (number of samples) that stores the subgroup id ('0','1', ...) for each cell. The groups  typically correspond to 'progenitor cells', 'undecided cells' or 'branches' of a process.

The tool is similar to the R package `destiny` of Angerer et al (2016).

More details on the `tl.dpt scanpy documentation
<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.dpt.html#scanpy.api.tl.dpt>`_

    ]]></help>
    <expand macro="citations"/>
</tool>