Mercurial > repos > iuc > scanpy_remove_confounders

<tool id="scanpy_remove_confounders" name="Remove confounders" version="@galaxy_version@" profile="@profile@">
    <description>with scanpy</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
@CMD@
      ]]></command>
    <configfiles>
        <configfile name="script_file"><![CDATA[
@CMD_imports@
@CMD_read_inputs@

#if $method.method == "pp.regress_out"
sc.pp.regress_out(
   adata=adata,
   #set $keys = [str(x.strip()) for x in str($method.keys).split(',')]
   keys=$keys,
   copy=False)

#else if $method.method == "pp.mnn_correct"
    #for i, filepath in enumerate($methods.extra_adata)
adata_$i = ad.read('$filepath')
    #end for

sc.pp.mnn_correct(
    adata,
    #for i, filepath in enumerate($methods.extra_adata)
    adata_$i,
    #end for
    #if str($methods.var_subset) != ''
    #set $var_subset=([x.strip() for x in str($method.var_subset).split(',')])
    var_subset=$var_subset,
    #end if
    batch_key='$method.batch_key',
    index_unique='$method.index_unique'
    #if str($methods.batch_categories) != ''
    #set $batch_categories=([x.strip() for x in str($method.batch_categories).split(',')])
    batch_categories=$batch_categories,
    #end if
    k=$method.k,
    sigma=$method.sigma,
    cos_norm_in=$method.cos_norm_in,
    cos_norm_out=$method.cos_norm_out,
    svd_dim=$method.svd_dim,
    var_adj=$method.var_adj,
    compute_angle=$method.compute_angle,
    mnn_order='$method.mnn_order',
    svd_mode='$method.svd_mode',
    do_concatenate=True,
    save_raw=True,
    n_jobs=\${GALAXY_SLOTS:-4})

#else if $method.method == "pp.combat"
sc.pp.combat(
    adata,
    key='$method.key',
    inplace=True)

#end if

@CMD_anndata_write_outputs@
]]></configfile>
    </configfiles>
    <inputs>
        <expand macro="inputs_anndata"/>
        <conditional name="method">
            <param argument="method" type="select" label="Method used for plotting">
                <option value="pp.regress_out">Regress out unwanted sources of variation, using 'pp.regress_out'</option>
                <option value="pp.mnn_correct">Correct batch effects by matching mutual nearest neighbors, using 'pp.mnn_correct'</option>
                <option value="pp.combat">Correct batch effects with ComBat function, using 'pp.combat'</option>
            </param>
            <when value="pp.regress_out">
                <param argument="keys" type="text" value="" label="Keys for observation annotation on which to regress on" help="Keys separated by a comma"/>
            </when>
            <when value="pp.mnn_correct">
                <param name="extra_adata" type="data" multiple="true" optional="true" format="h5ad" label="Extra annotated data matrix" help="They should have same number of variables."/>
                <param argument="var_subset" type="text" value="" optional="true" label="The subset of vars to be used when performing MNN correction" help="List of comma-separated key from '.var_names'. If not set, all vars are used"/>
                <param argument="batch_key" type="text" value="batch" label="Batch key for the concatenate"/>
                <param name="index_unique" type="select" label="Separator to join the existing index names with the batch category" help="Leave it empty to keep existing indices">
                    <option value="-">-</option>
                    <option value="_">_</option>
                    <option value=" "> </option>
                    <option value="/">/</option>
                </param>
                <param argument="batch_categories" type="text" value="" optional="true" label="Batch categories for the concatenate" help="List of comma-separated key"/>
                <param argument="k" type="integer" value="20" label="Number of mutual nearest neighbors"/>
                <param argument="sigma" type="float" value="1" label="The bandwidth of the Gaussian smoothing kernel used to compute the correction vectors"/>
                <param argument="cos_norm_in" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Should cosine normalization be performed on the input data prior to calculating distances between cells?"/>
                <param argument="cos_norm_out" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Should cosine normalization be performed prior to computing corrected expression values?"/>
                <param argument="svd_dim" type="integer" value="" optional="true" label="Number of dimensions to use for summarizing biological substructure within each batch" help="If not set, biological components will not be removed from the correction vectors."/>
                <param argument="var_adj" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Adjust variance of the correction vectors?" help="This step takes most computing time."/>
                <param argument="compute_angle" type="boolean" truevalue="True" falsevalue="False" checked="false" label="compute the angle between each cell’s correction vector and the biological subspace of the reference batch?"/>
                <param argument="mnn_order" type="text" value="" optional="true" label="The order in which batches are to be corrected" help="List of comma-separated key. If not set, datas are corrected sequentially"/>
                <param name="svd_mode" type="select" label="SVD mode">
                    <option value="svd">svd: SVD using a non-randomized SVD-via-ID algorithm</option>
                    <option value="rsvd" selected="true">rsvd: SVD using a randomized SVD-via-ID algorithm</option>
                    <option value="irlb">irlb: truncated SVD by implicitly restarted Lanczos bidiagonalization</option>
                </param>
            </when>
            <when value="pp.combat">
                <param argument="key" type="text" value="batch" label="Key to a categorical annotation from adata.obs that will be used for batch effect removal"/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <expand macro="anndata_outputs"/>
    </outputs>
    <tests>
        <test>
            <!-- test 1 -->
            <param name="adata" value="krumsiek11.h5ad" />
            <conditional name="method">
                <param name="method" value="pp.regress_out"/>
                <param name="keys" value="cell_type"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="sc.pp.regress_out"/>
                <has_text_matching expression="keys=\['cell_type'\]"/>
            </assert_stdout>
            <output name="anndata_out" file="pp.regress_out.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
        </test>
        <!--<test>
            < test 2 >
            <param name="adata" value="krumsiek11.h5ad" />
            <conditional name="method">
                <param name="method" value="pp.mnn_correct"/>
                <param name="reg_keys" value="cell_type"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="sc.pp.mnn_correct"/>
                <has_text_matching expression="keys='cell_type'"/>
            </assert_stdout>
            <output name="anndata_out" file="pp.mnn_correct.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
        </test>-->
        <test>
            <!-- test 2 -->
            <param name="adata" value="blobs.h5ad" />
            <conditional name="method">
                <param name="method" value="pp.combat"/>
                <param name="key" value="blobs"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="sc.pp.combat"/>
                <has_text_matching expression="key='blobs'"/>
            </assert_stdout>
            <output name="anndata_out" file="pp.combat.blobs.h5ad" ftype="h5ad" compare="sim_size"/>
        </test>
    </tests>
    <help><![CDATA[
Regress out unwanted sources of variation, using `pp.regress_out`
=================================================================

Regress out unwanted sources of variation, using simple linear regression. This is
inspired by Seurat's `regressOut` function in R.

More details on the `scanpy documentation
<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.regress_out.html>`__

Correct batch effects by matching mutual nearest neighbors, using `pp.mnn_correct`
==================================================================================

This uses the implementation of mnnpy. Depending on do_concatenate, it returns AnnData objects in the
original order containing corrected expression values or a concatenated matrix or AnnData object.

Be reminded that it is not advised to use the corrected data matrices for differential expression testing.

More details on the `scanpy documentation
<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.api.pp.mnn_correct.html>`__


Correct batch effects with ComBat function (`pp.combat`)
========================================================

Corrects for batch effects by fitting linear models, gains statistical power via an EB framework where information is borrowed across genes. This uses the implementation of ComBat

More details on the `scanpy documentation
<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.combat.html>`__


    ]]></help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Thu, 12 Dec 2019 09:26:22 -0500
parents	94c8f42efc47
children	524aa62a6066