Mercurial > repos > iuc > scanpy_filter
view filter.xml @ 6:a97abb8cd15b draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 5a90fd345b43ca12366f4475f4cfd88ef197e452"
author | iuc |
---|---|
date | Thu, 20 Feb 2020 08:26:18 -0500 |
parents | 8b9610ab366a |
children | 3c86f71498bc |
line wrap: on
line source
<tool id="scanpy_filter" name="Filter" version="@galaxy_version@" profile="@profile@"> <description>with scanpy</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <expand macro="version_command"/> <command detect_errors="exit_code"><![CDATA[ @CMD@ ]]></command> <configfiles> <configfile name="script_file"><![CDATA[ @CMD_imports@ @CMD_read_inputs@ #if $method.method == 'pp.filter_cells' sc.pp.filter_cells( adata, #if $method.filter.filter == 'min_counts' min_counts=$method.filter.min_counts, #else if $method.filter.filter == 'max_counts' max_counts=$method.filter.max_counts, #else if $method.filter.filter == 'min_genes' min_genes=$method.filter.min_genes, #else if $method.filter.filter == 'max_genes' max_genes=$method.filter.max_genes, #end if copy=False) #else if $method.method == 'pp.filter_genes' sc.pp.filter_genes( adata, #if $method.filter.filter == 'min_counts' min_counts=$method.filter.min_counts, #else if $method.filter.filter == 'max_counts' max_counts=$method.filter.max_counts, #else if $method.filter.filter == 'min_cells' min_cells=$method.filter.min_cells, #else if $method.filter.filter == 'max_cells' max_cells=$method.filter.max_cells, #end if copy=False) #else if $method.method == 'tl.filter_rank_genes_groups' sc.tl.filter_rank_genes_groups( adata, #if str($method.key) != '' key='$method.key', #end if #if str($method.groupby) != '' groupby='$method.groupby', #end if use_raw=$method.use_raw, log=$method.log, key_added='$method.key_added', min_in_group_fraction=$method.min_in_group_fraction, max_out_group_fraction=$method.max_out_group_fraction, min_fold_change=$method.min_fold_change) #else if $method.method == "pp.highly_variable_genes" sc.pp.highly_variable_genes( adata=adata, flavor='$method.flavor.flavor', #if $method.flavor.flavor == 'seurat' #if str($method.flavor.min_mean) != '' min_mean=$method.flavor.min_mean, #end if #if str($method.flavor.max_mean) != '' max_mean=$method.flavor.max_mean, #end if #if str($method.flavor.min_disp) != '' min_disp=$method.flavor.min_disp, #end if #if str($method.flavor.max_disp) != '' max_disp=$method.flavor.max_disp, #end if #else if $method.flavor.flavor == 'cell_ranger' n_top_genes=$method.flavor.n_top_genes, #end if n_bins=$method.n_bins, subset=$method.subset, inplace=True) #else if $method.method == 'pp.subsample' sc.pp.subsample( data=adata, #if $method.type.type == 'fraction' fraction=$method.type.fraction, #else if $method.type.type == 'n_obs' n_obs=$method.type.n_obs, #end if random_state=$method.random_state, copy=False) #else if $method.method == "pp.downsample_counts" sc.pp.downsample_counts( adata=adata, #if str($method.counts_per_cell) != '' counts_per_cell=$method.counts_per_cell, #end if #if str($method.total_counts) != '' total_counts=$method.total_counts, #end if random_state=$method.random_state, replace=$method.replace, copy=False) #end if @CMD_anndata_write_outputs@ ]]></configfile> </configfiles> <inputs> <expand macro="inputs_anndata"/> <conditional name="method"> <param argument="method" type="select" label="Method used for filtering"> <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using 'pp.filter_cells'</option> <option value="pp.filter_genes">Filter genes based on number of cells or counts, using 'pp.filter_genes'</option> <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using 'tl.filter_rank_genes_groups'</option> <option value="pp.highly_variable_genes">Annotate (and filter) highly variable genes, using 'pp.highly_variable_genes'</option> <option value="pp.subsample">Subsample to a fraction of the number of observations, using 'pp.subsample'</option> <option value="pp.downsample_counts">Downsample counts from count matrix, using 'pp.downsample_counts'</option> </param> <when value="pp.filter_cells"> <conditional name="filter"> <param argument="filter" type="select" label="Filter"> <option value="min_counts">Minimum number of counts</option> <option value="max_counts">Maximum number of counts</option> <option value="min_genes">Minimum number of genes expressed</option> <option value="max_genes">Maximum number of genes expressed</option> </param> <when value="min_counts"> <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a cell to pass filtering" help=""/> </when> <when value="max_counts"> <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a cell to pass filtering" help=""/> </when> <when value="min_genes"> <param argument="min_genes" type="integer" min="0" value="" label="Minimum number of genes expressed required for a cell to pass filtering" help=""/> </when> <when value="max_genes"> <param argument="max_genes" type="integer" min="0" value="" label="Maximum number of genes expressed required for a cell to pass filtering" help=""/> </when> </conditional> </when> <when value="pp.filter_genes"> <conditional name="filter"> <param argument="filter" type="select" label="Filter"> <option value="min_counts">Minimum number of counts</option> <option value="max_counts">Maximum number of counts</option> <option value="min_cells">Minimum number of cells expressed</option> <option value="max_cells">Maximum number of cells expressed</option> </param> <when value="min_counts"> <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering"/> </when> <when value="max_counts"> <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering"/> </when> <when value="min_cells"> <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering"/> </when> <when value="max_cells"> <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering"/> </when> </conditional> </when> <when value="tl.filter_rank_genes_groups"> <param argument="key" type="text" optional="true" label="Key in adata.uns where the rank_genes_groups output is stored"/> <param argument="groupby" type="text" optional="true" label="The key of the observations grouping to consider"/> <expand macro="param_use_raw"/> <expand macro="param_log"/> <param argument="key_added" type="text" value="rank_genes_groups_filtered" label="Key that will contain new values"/> <param argument="min_in_group_fraction" type="float" min="0" max="1" value="0.25" label="Minimum fraction of genes expressing the gene within the categories"/> <param argument="max_out_group_fraction" type="float" min="0" max="1" value="0.5" label="Maximum fraction of genes expressing the gene outside the categories"/> <param argument="min_fold_change" type="integer" value="2" label="Minimum fold change"/> </when> <when value="pp.highly_variable_genes"> <conditional name='flavor'> <param argument="flavor" type="select" label="Flavor for computing normalized dispersion"> <option value="seurat">Seurat</option> <option value="cell_ranger">Cell Ranger</option> </param> <when value="seurat"> <param argument="min_mean" type="float" value="0.0125" label="Minimal mean cutoff"/> <param argument="max_mean" type="float" value="3" label="Maximal mean cutoff"/> <param argument="min_disp" type="float" value="0.5" label="Minimal normalized dispersion cutoff"/> <param argument="max_disp" type="float" value="" optional="true" label="Maximal normalized dispersion cutoff"/> </when> <when value="cell_ranger"> <param argument="n_top_genes" type="integer" value="" label="Number of highly-variable genes to keep"/> </when> </conditional> <param argument="n_bins" type="integer" value="20" label="Number of bins for binning the mean gene expression" help="Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1"/> <param argument="subset" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Inplace subset to highly-variable genes?" help="Otherwise it merely indicates highly variable genes."/> </when> <when value="pp.subsample"> <conditional name="type"> <param name="type" type="select" label="Type of subsampling"> <option value="fraction">By fraction</option> <option value="n_obs">By number of observation</option> </param> <when value="fraction"> <param argument="fraction" type="float" value="" label="Subsample to this 'fraction' of the number of observations"/> </when> <when value="n_obs"> <param argument="n_obs" type="integer" min="0" value="" label="Subsample to this number of observations"/> </when> </conditional> <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/> </when> <when value="pp.downsample_counts"> <param argument="counts_per_cell" type="integer" min="0" optional="true" label="Target total counts per cell" help="If a cell has more than ‘counts_per_cell’, it will be downsampled to this number. Resulting counts can be specified on a per cell basis by passing an array."/> <param argument="total_counts" type="integer" min="0" optional="true" label="Target total counts" help="If the count matrix has more than total_counts it will be downsampled to have this number."/> <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/> <param argument="replace" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Sample the counts with replacement?"/> </when> </conditional> <expand macro="inputs_common_advanced"/> </inputs> <outputs> <expand macro="anndata_outputs"/> </outputs> <tests> <test> <!-- test 0 --> <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.filter_cells"/> <conditional name="filter"> <param name="filter" value="min_counts"/> <param name="min_counts" value="3"/> </conditional> </conditional> <assert_stdout> <has_text_matching expression="336 × 11"/> </assert_stdout> <section name="advanced_common"> <param name="show_log" value="true" /> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="sc.pp.filter_cells"/> <has_text_matching expression="min_counts=3"/> </assert_contents> </output> <output name="anndata_out" file="pp.filter_cells.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/> </test> <test> <!-- test 1 --> <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.filter_cells"/> <conditional name="filter"> <param name="filter" value="max_genes"/> <param name="max_genes" value="100"/> </conditional> </conditional> <section name="advanced_common"> <param name="show_log" value="true" /> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="sc.pp.filter_cells"/> <has_text_matching expression="adata"/> <has_text_matching expression="max_genes=100"/> </assert_contents> </output> <output name="anndata_out" file="pp.filter_cells.krumsiek11-max_genes.h5ad" ftype="h5ad" compare="sim_size"/> </test> <test> <!-- test 2 --> <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.filter_genes"/> <conditional name="filter"> <param name="filter" value="min_counts"/> <param name="min_counts" value="3"/> </conditional> </conditional> <section name="advanced_common"> <param name="show_log" value="true" /> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="sc.pp.filter_genes"/> <has_text_matching expression="min_counts=3"/> </assert_contents> </output> <output name="anndata_out" file="pp.filter_genes.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/> </test> <!-- <test> --> <!-- <!-\- test 3 -\-> --> <!-- <!-\- Input dataset appears to be missing rank_genes_groups key... -\-> --> <!-- <param name="adata" value="tl.rank_genes_groups.krumsiek11.h5ad" /> --> <!-- <conditional name="method"> --> <!-- <param name="method" value="tl.filter_rank_genes_groups"/> --> <!-- <param name="key" value="rank_genes_groups"/> --> <!-- <param name="use_raw" value="False"/> --> <!-- <param name="log" value="False"/> --> <!-- <param name="key_added" value="rank_genes_groups_filtered"/> --> <!-- <param name="min_in_group_fraction" value="0.25"/> --> <!-- <param name="max_out_group_fraction" value="0.5"/> --> <!-- <param name="min_fold_change" value="3"/> --> <!-- </conditional> --> <!-- <output name="hidden_output"> --> <!-- <assert_contents> --> <!-- <has_text_matching expression="tl.filter_rank_genes_groups"/> --> <!-- <has_text_matching expression="key='rank_genes_groups'"/> --> <!-- <has_text_matching expression="use_raw=False"/> --> <!-- <has_text_matching expression="log=False"/> --> <!-- <has_text_matching expression="key_added='rank_genes_groups_filtered'"/> --> <!-- <has_text_matching expression="min_in_group_fraction=0.25"/> --> <!-- <has_text_matching expression="max_out_group_fraction=0.5"/> --> <!-- <has_text_matching expression="min_fold_change=3"/> --> <!-- </assert_contents> --> <!-- </output> --> <!-- <output name="anndata_out" file="pp.filter_rank_genes_groups.h5ad" ftype="h5ad" compare="sim_size"/> --> <!-- </test> --> <test> <!-- test 4 --> <param name="adata" value="blobs.h5ad"/> <conditional name="method"> <param name="method" value="pp.highly_variable_genes"/> <conditional name="flavor"> <param name="flavor" value="seurat"/> <param name="min_mean" value="0.0125"/> <param name="max_mean" value="3"/> <param name="min_disp" value="0.5"/> </conditional> <param name="n_bins" value="20"/> <param name="subset" value="false"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true" /> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="sc.pp.highly_variable_genes"/> <has_text_matching expression="flavor='seurat'"/> <has_text_matching expression="min_mean=0.0125"/> <has_text_matching expression="max_mean=3"/> <has_text_matching expression="min_disp=0.5"/> <has_text_matching expression="n_bins=20"/> <has_text_matching expression="subset=False"/> </assert_contents> </output> <output name="anndata_out" file="pp.highly_variable_genes.seurat.blobs.h5ad" ftype="h5ad" compare="sim_size"/> </test> <test> <!-- test 5 --> <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.highly_variable_genes"/> <conditional name="flavor"> <param name="flavor" value="cell_ranger"/> <param name="n_top_genes" value="2"/> </conditional> <param name="n_bins" value="20"/> <param name="subset" value="true"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true" /> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="sc.pp.highly_variable_genes"/> <has_text_matching expression="flavor='cell_ranger'"/> <has_text_matching expression="n_top_genes=2"/> <has_text_matching expression="n_bins=20"/> <has_text_matching expression="subset=True"/> </assert_contents> </output> <output name="anndata_out" file="pp.highly_variable_genes.krumsiek11-cell_ranger.h5ad" ftype="h5ad" compare="sim_size"/> </test> <test> <!-- test 6 --> <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.subsample"/> <conditional name="type"> <param name="type" value="fraction" /> <param name="fraction" value="0.5"/> </conditional> <param name="random_state" value="0"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true" /> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="sc.pp.subsample"/> <has_text_matching expression="fraction=0.5"/> <has_text_matching expression="random_state=0"/> </assert_contents> </output> <output name="anndata_out" file="pp.subsample.krumsiek11_fraction.h5ad" ftype="h5ad" compare="sim_size"/> </test> <test> <!-- test 7 --> <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.subsample"/> <conditional name="type"> <param name="type" value="n_obs" /> <param name="n_obs" value="10"/> </conditional> <param name="random_state" value="0"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true" /> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="sc.pp.subsample"/> <has_text_matching expression="n_obs=10"/> <has_text_matching expression="random_state=0"/> </assert_contents> </output> <output name="anndata_out" file="pp.subsample.krumsiek11_n_obs.h5ad" ftype="h5ad" compare="sim_size"/> </test> <test> <!-- test 8 --> <param name="adata" value="random-randint.h5ad" /> <conditional name="method"> <param name="method" value="pp.downsample_counts"/> <param name="total_counts" value="20000"/> <param name="random_state" value="0"/> <param name="replace" value="false"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true" /> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="sc.pp.downsample_counts"/> <has_text_matching expression="total_counts=20000"/> <has_text_matching expression="random_state=0"/> <has_text_matching expression="replace=False"/> </assert_contents> </output> <output name="anndata_out" file="pp.downsample_counts.random-randint.h5ad" ftype="h5ad" compare="sim_size"/> </test> </tests> <help><![CDATA[ Filter cells outliers based on counts and numbers of genes expressed (`pp.filter_cells`) ======================================================================================== For instance, only keep cells with at least `min_counts` counts or `min_genes` genes expressed. This is to filter measurement outliers, i.e., "unreliable" observations. Only provide one of the optional parameters `min_counts`, `min_genes`, `max_counts`, `max_genes` per call. More details on the `scanpy documentation <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.filter_cells.html>`__ Filter genes based on number of cells or counts (`pp.filter_genes`) =================================================================== Keep genes that have at least `min_counts` counts or are expressed in at least `min_cells` cells or have at most `max_counts` counts or are expressed in at most `max_cells` cells. Only provide one of the optional parameters `min_counts`, `min_cells`, `max_counts`, `max_cells` per call. More details on the `scanpy documentation <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.filter_genes.html>`__ Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories (`tl.filter_rank_genes_groups`) ========================================================================================================================================================== More details on the `scanpy documentation <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.tl.filter_rank_genes_groups.html>`__ Annotate highly variable genes (`pp.highly_variable_genes`) =========================================================== It expects logarithmized data. Depending on flavor, this reproduces the R-implementations of Seurat or Cell Ranger. The normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for genes falling into a given bin for mean expression of genes. This means that for each bin of mean expression, highly variable genes are selected. Subsample to a fraction of the number of observations (`pp.subsample`) ====================================================================== More details on the `scanpy documentation <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.subsample.html>`__ Downsample counts (`pp.downsample_counts`) ========================================== Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This has been implemented by M. D. Luecken. More details on the `scanpy documentation <https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.downsample_counts.html>`__ ]]></help> <expand macro="citations"/> </tool>