scanpy_filter: filter.xml comparison

comparison filter.xml @ 1:6a76b60e05f5 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 8ef5f7c6f8728608a3f05bb51e11b642b84a05f5"

author	iuc
date	Wed, 16 Oct 2019 06:32:33 -0400
parents	6ea5a05a260a
children	e62673c32a5d

comparison

equal deleted inserted replaced

-:6ea5a05a260a
+:6a76b60e05f5
-<tool id="scanpy_filter" name="Filter with scanpy" version="@galaxy_version@">
+<tool id="scanpy_filter" name="Filter" version="@galaxy_version@">
-<description></description>
+<description>with scanpy</description>
 <macros>
 <import>macros.xml</import>
 </macros>
 <expand macro="requirements"/>
 <expand macro="version_command"/>
 <configfile name="script_file"><![CDATA[
 @CMD_imports@
 @CMD_read_inputs@
 #if $method.method == 'pp.filter_cells'
-res = sc.pp.filter_cells(
+sc.pp.filter_cells(
-#if $modify_anndata.modify_anndata == 'true'
 adata,
-#else
-adata.X,
-#end if
 #if $method.filter.filter == 'min_counts'
 min_counts=$method.filter.min_counts,
-#elif $method.filter.filter == 'max_counts'
+#else if $method.filter.filter == 'max_counts'
 max_counts=$method.filter.max_counts,
-#elif $method.filter.filter == 'min_genes'
+#else if $method.filter.filter == 'min_genes'
 min_genes=$method.filter.min_genes,
-#elif $method.filter.filter == 'max_genes'
+#else if $method.filter.filter == 'max_genes'
 max_genes=$method.filter.max_genes,
 #end if
 copy=False)
-#if $modify_anndata.modify_anndata == 'true'
+#else if $method.method == 'pp.filter_genes'
-df = adata.obs
+sc.pp.filter_genes(
-#else
-df = pd.DataFrame(data=dict(cell_subset=res[0], number_per_cell=res[1]))
-#end if
-#if $method.filter.filter == 'min_counts' or $method.filter.filter == 'max_counts'
-df.to_csv('$counts_per_cell', sep='\t')
-#elif $method.filter.filter == 'min_genes' or $method.filter.filter == 'max_genes'
-df.to_csv('$genes_per_cell', sep='\t')
-#end if
-#elif $method.method == 'pp.filter_genes'
-res = sc.pp.filter_genes(
-#if $modify_anndata.modify_anndata == 'true'
 adata,
-#else
-adata.X,
-#end if
 #if $method.filter.filter == 'min_counts'
 min_counts=$method.filter.min_counts,
-#elif $method.filter.filter == 'max_counts'
+#else if $method.filter.filter == 'max_counts'
 max_counts=$method.filter.max_counts,
-#elif $method.filter.filter == 'min_cells'
+#else if $method.filter.filter == 'min_cells'
 min_cells=$method.filter.min_cells,
-#elif $method.filter.filter == 'max_cells'
+#else if $method.filter.filter == 'max_cells'
 max_cells=$method.filter.max_cells,
 #end if
 copy=False)
-#if $modify_anndata.modify_anndata == 'true'
+#else if $method.method == 'tl.filter_rank_genes_groups'
-df = adata.var
+sc.tl.filter_rank_genes_groups(
-#else
-df = pd.DataFrame(data=dict(gene_subset=res[0], number_per_gene=res[1]))
-#end if
-#if $method.filter.filter == 'min_counts' or $method.filter.filter == 'max_counts'
-df.to_csv('$counts_per_gene', sep='\t')
-#elif $method.filter.filter == 'min_cells' or $method.filter.filter == 'max_cells'
-df.to_csv('$cells_per_gene', sep='\t')
-#end if
-#elif $method.method == 'pp.filter_genes_dispersion'
-res = sc.pp.filter_genes_dispersion(
-#if $modify_anndata.modify_anndata == 'true'
 adata,
-#else
+#if str($method.key) != ''
-adata.X,
+key='$method.key',
 #end if
+#if str($method.groupby) != ''
+groupby='$method.groupby',
+#end if
+use_raw=$method.use_raw,
+log=$method.log,
+key_added='$method.key_added',
+min_in_group_fraction=$method.min_in_group_fraction,
+max_out_group_fraction=$method.max_out_group_fraction,
+min_fold_change=$method.min_fold_change)
+#else if $method.method == "pp.highly_variable_genes"
+sc.pp.highly_variable_genes(
+adata=adata,
 flavor='$method.flavor.flavor',
-#if $method.flavor.flavor=='seurat'
+#if $method.flavor.flavor == 'seurat'
+#if str($method.flavor.min_mean) != ''
 min_mean=$method.flavor.min_mean,
+#end if
+#if str($method.flavor.max_mean) != ''
 max_mean=$method.flavor.max_mean,
+#end if
+#if str($method.flavor.min_disp) != ''
 min_disp=$method.flavor.min_disp,
-#if $method.flavor.max_disp
+#end if
+#if str($method.flavor.max_disp) != ''
 max_disp=$method.flavor.max_disp,
 #end if
-#else
+#else if $method.flavor.flavor == 'cell_ranger'
 n_top_genes=$method.flavor.n_top_genes,
 #end if
 n_bins=$method.n_bins,
-log=$method.log,
+subset=$method.subset,
-copy=False)
+inplace=True)
-#if $modify_anndata.modify_anndata == 'true'
+#else if $method.method == 'pp.subsample'
-adata.var.to_csv('$per_gene', sep='\t')
-#else
-pd.DataFrame(res).to_csv('$per_gene', sep='\t')
-#end if
-#elif $method.method == 'pp.subsample'
 sc.pp.subsample(
 data=adata,
 #if $method.type.type == 'fraction'
 fraction=$method.type.fraction,
 #else if $method.type.type == 'n_obs'
 n_obs=$method.type.n_obs,
 #end if
 random_state=$method.random_state,
 copy=False)
+#else if $method.method == "pp.downsample_counts"
+sc.pp.downsample_counts(
+adata=adata,
+#if str($method.counts_per_cell) != ''
+counts_per_cell=$method.counts_per_cell,
+#end if
+#if str($method.total_counts) != ''
+total_counts=$method.total_counts,
+#end if
+random_state=$method.random_state,
+replace=$method.replace,
+copy=False)
 #end if
-@CMD_anndata_write_modify_outputs@
+@CMD_anndata_write_outputs@
 ]]></configfile>
 </configfiles>
 <inputs>
 <expand macro="inputs_anndata"/>
 <conditional name="method">
 <param argument="method" type="select" label="Method used for filtering">
 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using `pp.filter_cells`</option>
 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using `pp.filter_genes`</option>
-<option value="pp.filter_genes_dispersion">Extract highly variable genes, using `pp.filter_genes_dispersion`</option>
+<option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using `tl.filter_rank_genes_groups`</option>
-<!--<option value="pp.highly_variable_genes">, using `tl.highly_variable_genes`</option>!-->
+<option value="pp.highly_variable_genes">, using `tl.highly_variable_genes`</option>
 <option value="pp.subsample">Subsample to a fraction of the number of observations, using `pp.subsample`</option>
-<!--<option value="queries.gene_coordinates">, using `queries.gene_coordinates`</option>!-->
+<option value="pp.downsample_counts">Downsample counts from count matrix, using `pp.downsample_counts`</option>
-<!--<option value="queries.mitochondrial_genes">, using `queries.mitochondrial_genes`</option>!-->
 </param>
 <when value="pp.filter_cells">
 <conditional name="filter">
 <param argument="filter" type="select" label="Filter">
 <option value="min_counts">Minimum number of counts</option>
 <option value="max_counts">Maximum number of counts</option>
 <option value="min_cells">Minimum number of cells expressed</option>
 <option value="max_cells">Maximum number of cells expressed</option>
 </param>
 <when value="min_counts">
-<param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering" help=""/>
+<param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering"/>
 </when>
 <when value="max_counts">
-<param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering" help=""/>
+<param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering"/>
 </when>
 <when value="min_cells">
-<param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering" help=""/>
+<param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering"/>
 </when>
 <when value="max_cells">
-<param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering" help=""/>
+<param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering"/>
 </when>
 </conditional>
 </when>
-<when value="pp.filter_genes_dispersion">
+<when value="tl.filter_rank_genes_groups">
+<param argument="key" type="text" optional="true" label="Key in adata.uns where the rank_genes_groups output is stored"/>
+<param argument="groupby" type="text" optional="true" label="The key of the observations grouping to consider"/>
+<expand macro="param_use_raw"/>
+<expand macro="param_log"/>
+<param argument="key_added" type="text" value="rank_genes_groups_filtered" label="Key that will contain new values"/>
+<param argument="min_in_group_fraction" type="float" min="0" max="1" value="0.25" label="Minimum fraction of genes expressing the gene within the categories"/>
+<param argument="max_out_group_fraction" type="float" min="0" max="1" value="0.5" label="Maximum fraction of genes expressing the gene outside the categories"/>
+<param argument="min_fold_change" type="integer" value="2" label="Minimum fold change"/>
+</when>
+<when value="pp.highly_variable_genes">
 <conditional name='flavor'>
-<param argument="flavor" type="select" label="Flavor for computing normalized dispersion" help="">
+<param argument="flavor" type="select" label="Flavor for computing normalized dispersion">
 <option value="seurat">seurat: expects non-logarithmized data</option>
 <option value="cell_ranger">cell_ranger: usually called for logarithmized data</option>
 </param>
 <when value="seurat">
-<param argument="min_mean" type="float" value="0.0125" label="Minimal mean cutoff" help=""/>
+<param argument="min_mean" type="float" value="0.0125" label="Minimal mean cutoff"/>
-<param argument="max_mean" type="float" value="3" label="Maximal mean cutoff" help=""/>
+<param argument="max_mean" type="float" value="3" label="Maximal mean cutoff"/>
-<param argument="min_disp" type="float" value="0.5" label="Minimal normalized dispersion cutoff" help=""/>
+<param argument="min_disp" type="float" value="0.5" label="Minimal normalized dispersion cutoff"/>
-<param argument="max_disp" type="float" value="" optional="true" label="Maximal normalized dispersion cutoff" help=""/>
+<param argument="max_disp" type="float" value="" optional="true" label="Maximal normalized dispersion cutoff"/>
 </when>
 <when value="cell_ranger">
-<param argument="n_top_genes" type="integer" value="" label="Number of highly-variable genes to keep" help=""/>
+<param argument="n_top_genes" type="integer" value="" label="Number of highly-variable genes to keep"/>
 </when>
 </conditional>
 <param argument="n_bins" type="integer" value="20" label="Number of bins for binning the mean gene expression" help="Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1"/>
-<expand macro="param_log"/>
+<param argument="subset" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Inplace subset to highly-variable genes?" help="Otherwise it merely indicates highly variable genes."/>
 </when>
 <when value="pp.subsample">
 <conditional name="type">
 <param name="type" type="select" label="Type of subsampling">
 <option value="fraction">By fraction</option>
 <option value="n_obs">By number of observation</option>
 </param>
 <when value="fraction">
-<param argument="fraction" type="float" value="" label="Subsample to this `fraction` of the number of observations" help=""/>
+<param argument="fraction" type="float" value="" label="Subsample to this `fraction` of the number of observations"/>
 </when>
 <when value="n_obs">
-<param argument="n_obs" type="integer" min="0" value="" label="Subsample to this number of observations" help=""/>
+<param argument="n_obs" type="integer" min="0" value="" label="Subsample to this number of observations"/>
 </when>
 </conditional>
-<param argument="random_state" type="integer" value="0" label="Random seed to change subsampling" help=""/>
+<param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/>
+</when>
+<when value="pp.downsample_counts">
+<param argument="counts_per_cell" type="integer" min="0" optional="true" label="Target total counts per cell" help="If a cell has more than ‘counts_per_cell’, it will be downsampled to this number. Resulting counts can be specified on a per cell basis by passing an array."/>
+<param argument="total_counts" type="integer" min="0" optional="true" label="Target total counts" help="If the count matrix has more than total_counts it will be downsampled to have this number."/>
+<param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/>
+<param argument="replace" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Sample the counts with replacement?"/>
 </when>
 </conditional>
-<expand macro="anndata_modify_output_input"/>
 </inputs>
 <outputs>
-<expand macro="anndata_modify_outputs"/>
+<expand macro="anndata_outputs"/>
-<!-- for pp.filter_cells -->
-<data name="counts_per_cell" format="tabular" label="${tool.name} on ${on_string}: Counts per cells after filtering">
-<filter>method['method'] == 'pp.filter_cells' and (method['filter']['filter'] == 'min_counts' or method['filter']['filter'] == 'max_counts')</filter>
-</data>
-<data name="genes_per_cell" format="tabular" label="${tool.name} on ${on_string}: Number of genes per cell after filtering">
-<filter>method['method'] == 'pp.filter_cells' and (method['filter']['filter'] == 'min_genes' or method['filter']['filter'] == 'max_genes')</filter>
-</data>
-<!-- for pp.filter_genes -->
-<data name="counts_per_gene" format="tabular" label="${tool.name} on ${on_string}: Counts per genes after filtering">
-<filter>method['method'] == 'pp.filter_genes' and (method['filter']['filter'] == 'min_counts' or method['filter']['filter'] == 'max_counts')</filter>
-</data>
-<data name="cells_per_gene" format="tabular" label="${tool.name} on ${on_string}: Number of cells per genes after filtering">
-<filter>method['method'] == 'pp.filter_genes' and (method['filter']['filter'] == 'min_cells' or method['filter']['filter'] == 'max_cells')</filter>
-</data>
-<!-- for pp.filter_genes_dispersion -->
-<data name="per_gene" format="tabular" label="${tool.name} on ${on_string}: Means, dispersions and normalized dispersions per gene">
-<filter>method['method'] == 'pp.filter_genes_dispersion'</filter>
-</data>
 </outputs>
 <tests>
-<test expect_num_outputs="2">
+<test>
-<conditional name="input">
+<!-- test 1 -->
-<param name="format" value="h5ad" />
+<param name="adata" value="krumsiek11.h5ad" />
-<param name="adata" value="krumsiek11.h5ad" />
-</conditional>
 <conditional name="method">
 <param name="method" value="pp.filter_cells"/>
 <conditional name="filter">
 <param name="filter" value="min_counts"/>
 <param name="min_counts" value="3"/>
 </conditional>
 </conditional>
-<conditional name="modify_anndata">
-<param name="modify_anndata" value="true"/>
-<param name="anndata_output_format" value="h5ad" />
-</conditional>
 <assert_stdout>
 <has_text_matching expression="sc.pp.filter_cells"/>
 <has_text_matching expression="min_counts=3"/>
 </assert_stdout>
-<output name="anndata_out_h5ad" file="pp.filter_cells.krumsiek11-min_counts.h5ad" ftype="h5" compare="sim_size"/>
+<output name="anndata_out" file="pp.filter_cells.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/>
-<output name="counts_per_cell">
+</test>
-<assert_contents>
+<test>
-<has_text_matching expression="cell_type\tn_counts" />
+<!-- test 2 -->
-<has_text_matching expression="46\tprogenitor\t3.028" />
+<param name="adata" value="krumsiek11.h5ad" />
-<has_text_matching expression="85\tEry\t3.7001" />
-<has_text_matching expression="150\tMk\t4.095" />
-<has_n_columns n="3" />
-</assert_contents>
-</output>
-</test>
-<test expect_num_outputs="2">
-<conditional name="input">
-<param name="format" value="loom" />
-<param name="adata" value="krumsiek11.loom" />
-<param name="sparse" value="True"/>
-<param name="cleanup" value="False"/>
-<param name="x_name"  value="spliced"/>
-<param name="obs_names" value="CellID" />
-<param name="var_names" value="Gene"/>
-</conditional>
-<conditional name="method">
-<param name="method" value="pp.filter_cells"/>
-<conditional name="filter">
-<param name="filter" value="min_counts"/>
-<param name="min_counts" value="3"/>
-</conditional>
-</conditional>
-<conditional name="modify_anndata">
-<param name="modify_anndata" value="true"/>
-<param name="anndata_output_format" value="loom" />
-</conditional>
-<assert_stdout>
-<has_text_matching expression="sc.pp.filter_cells"/>
-<has_text_matching expression="min_counts=3"/>
-</assert_stdout>
-<output name="anndata_out_loom" file="pp.filter_cells.krumsiek11-min_counts.loom" ftype="loom" compare="sim_size"/>
-<output name="counts_per_cell">
-<assert_contents>
-<has_text_matching expression="cell_type\tn_counts" />
-<has_text_matching expression="46\tprogenitor\t3.028" />
-<has_text_matching expression="85\tEry\t3.7001" />
-<has_text_matching expression="97\tMo\t3.925" />
-<has_text_matching expression="150\tMk\t4.095" />
-<has_n_columns n="3" />
-</assert_contents>
-</output>
-</test>
-<test expect_num_outputs="1">
-<conditional name="input">
-<param name="format" value="h5ad" />
-<param name="adata" value="krumsiek11.h5ad"/>
-</conditional>
 <conditional name="method">
 <param name="method" value="pp.filter_cells"/>
 <conditional name="filter">
 <param name="filter" value="max_genes"/>
 <param name="max_genes" value="100"/>
 </conditional>
 </conditional>
-<conditional name="modify_anndata">
-<param name="modify_anndata" value="false"/>
-</conditional>
 <assert_stdout>
 <has_text_matching expression="sc.pp.filter_cells"/>
-<has_text_matching expression="adata.X"/>
+<has_text_matching expression="adata"/>
 <has_text_matching expression="max_genes=100"/>
 </assert_stdout>
-<output name="genes_per_cell" file="pp.filter_cells.number_per_cell.krumsiek11-max_genes.tabular"/>
+<output name="anndata_out" file="pp.filter_cells.krumsiek11-max_genes.h5ad" ftype="h5ad" compare="sim_size"/>
 </test>
-<test expect_num_outputs="2">
+<test>
-<conditional name="input">
+<!-- test 3 -->
-<param name="format" value="h5ad" />
+<param name="adata" value="krumsiek11.h5ad" />
-<param name="adata" value="krumsiek11.h5ad" />
-</conditional>
 <conditional name="method">
 <param name="method" value="pp.filter_genes"/>
 <conditional name="filter">
 <param name="filter" value="min_counts"/>
 <param name="min_counts" value="3"/>
 </conditional>
 </conditional>
-<conditional name="modify_anndata">
-<param name="modify_anndata" value="true"/>
-<param name="anndata_output_format" value="h5ad" />
-</conditional>
 <assert_stdout>
 <has_text_matching expression="sc.pp.filter_genes"/>
 <has_text_matching expression="min_counts=3"/>
 </assert_stdout>
-<output name="anndata_out_h5ad" file="pp.filter_genes.krumsiek11-min_counts.h5ad" ftype="h5" compare="sim_size"/>
+<output name="anndata_out" file="pp.filter_genes.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/>
-<output name="counts_per_gene" file="pp.filter_genes.number_per_gene.krumsiek11-min_counts.tabular"/>
+</test>
-</test>
+<test>
-<test expect_num_outputs="1">
+<!-- test 4 -->
-<conditional name="input">
+<param name="adata" value="tl.rank_genes_groups.krumsiek11.h5ad" />
-<param name="format" value="h5ad" />
+<conditional name="method">
-<param name="adata" value="pbmc68k_reduced.h5ad"/>
+<param name="method" value="tl.filter_rank_genes_groups"/>
-</conditional>
+<param name="key" value="rank_genes_groups"/>
-<conditional name="method">
+<param name="use_raw" value="False"/>
-<param name="method" value="pp.filter_genes"/>
+<param name="log" value="False"/>
-<conditional name="filter">
+<param name="key_added" value="rank_genes_groups_filtered"/>
-<param name="filter" value="max_cells"/>
+<param name="min_in_group_fraction" value="0.25"/>
-<param name="max_cells" value="500"/>
+<param name="max_out_group_fraction" value="0.5"/>
-</conditional>
+<param name="min_fold_change" value="3"/>
 </conditional>
-<conditional name="modify_anndata">
+<assert_stdout>
-<param name="modify_anndata" value="false"/>
+<has_text_matching expression="tl.filter_rank_genes_groups"/>
-</conditional>
+<has_text_matching expression="key='rank_genes_groups'"/>
-<assert_stdout>
+<has_text_matching expression="use_raw=False"/>
-<has_text_matching expression="sc.pp.filter_genes"/>
+<has_text_matching expression="log=False"/>
-<has_text_matching expression="adata.X"/>
+<has_text_matching expression="key_added='rank_genes_groups_filtered'"/>
-<has_text_matching expression="max_cells=500"/>
+<has_text_matching expression="min_in_group_fraction=0.25"/>
-</assert_stdout>
+<has_text_matching expression="max_out_group_fraction=0.5"/>
-<output name="cells_per_gene" file="pp.filter_genes.number_per_gene.pbmc68k_reduced-max_cells.tabular"/>
+<has_text_matching expression="min_fold_change=3"/>
-</test>
+</assert_stdout>
-<test expect_num_outputs="2">
+<output name="anndata_out" file="pp.filter_rank_genes_groups.h5ad" ftype="h5ad" compare="sim_size"/>
-<conditional name="input">
+</test>
-<param name="format" value="h5ad" />
+<test>
-<param name="adata" value="krumsiek11.h5ad" />
+<!-- test 5 -->
-</conditional>
+<param name="adata" value="blobs.h5ad"/>
 <conditional name="method">
-<param name="method" value="pp.filter_genes_dispersion"/>
+<param name="method" value="pp.highly_variable_genes"/>
 <conditional name="flavor">
 <param name="flavor" value="seurat"/>
 <param name="min_mean" value="0.0125"/>
 <param name="max_mean" value="3"/>
 <param name="min_disp" value="0.5"/>
 </conditional>
-<param name="n_bins" value="20" />
+<param name="n_bins" value="20"/>
-<param name="log" value="true"/>
+<param name="subset" value="false"/>
 </conditional>
-<conditional name="modify_anndata">
+<assert_stdout>
-<param name="modify_anndata" value="true"/>
+<has_text_matching expression="sc.pp.highly_variable_genes"/>
-<param name="anndata_output_format" value="h5ad" />
-</conditional>
-<assert_stdout>
-<has_text_matching expression="sc.pp.filter_genes_dispersion"/>
 <has_text_matching expression="flavor='seurat'"/>
 <has_text_matching expression="min_mean=0.0125"/>
-<has_text_matching expression="max_mean=3.0"/>
+<has_text_matching expression="max_mean=3"/>
 <has_text_matching expression="min_disp=0.5"/>
 <has_text_matching expression="n_bins=20"/>
-<has_text_matching expression="log=True"/>
+<has_text_matching expression="subset=False"/>
 </assert_stdout>
-<output name="anndata_out_h5ad" file="pp.filter_genes_dispersion.krumsiek11-seurat.h5ad" ftype="h5" compare="sim_size"/>
+<output name="anndata_out" file="pp.highly_variable_genes.seurat.blobs.h5ad" ftype="h5ad" compare="sim_size"/>
-<output name="per_gene" file="pp.filter_genes_dispersion.per_gene.krumsiek11-seurat.tabular"/>
+</test>
-</test>
+<test>
-<test expect_num_outputs="1">
+<!-- test 6 -->
-<conditional name="input">
+<param name="adata" value="krumsiek11.h5ad" />
-<param name="format" value="h5ad" />
+<conditional name="method">
-<param name="adata" value="krumsiek11.h5ad" />
+<param name="method" value="pp.highly_variable_genes"/>
-</conditional>
-<conditional name="method">
-<param name="method" value="pp.filter_genes_dispersion"/>
 <conditional name="flavor">
 <param name="flavor" value="cell_ranger"/>
 <param name="n_top_genes" value="2"/>
 </conditional>
 <param name="n_bins" value="20"/>
-<param name="log" value="true"/>
+</conditional>
-</conditional>
+<assert_stdout>
-<conditional name="modify_anndata">
+<has_text_matching expression="sc.pp.highly_variable_genes"/>
-<param name="modify_anndata" value="false"/>
-</conditional>
-<assert_stdout>
-<has_text_matching expression="sc.pp.filter_genes_dispersion"/>
 <has_text_matching expression="flavor='cell_ranger'"/>
 <has_text_matching expression="n_top_genes=2"/>
 <has_text_matching expression="n_bins=20"/>
-<has_text_matching expression="og=True"/>
+</assert_stdout>
-</assert_stdout>
+<output name="anndata_out" file="pp.highly_variable_genes.krumsiek11-cell_ranger.h5ad" ftype="h5ad" compare="sim_size"/>
-<output name="per_gene" file="pp.filter_genes_dispersion.per_gene.krumsiek11-cell_ranger.tabular"/>
+</test>
-</test>
+<test>
-<test expect_num_outputs="1">
+<!-- test 7 -->
-<conditional name="input">
+<param name="adata" value="krumsiek11.h5ad" />
-<param name="format" value="h5ad" />
-<param name="adata" value="krumsiek11.h5ad" />
-</conditional>
 <conditional name="method">
 <param name="method" value="pp.subsample"/>
 <conditional name="type">
 <param name="type" value="fraction" />
 <param name="fraction" value="0.5"/>
 </conditional>
 <param name="random_state" value="0"/>
 </conditional>
-<conditional name="modify_anndata">
-<param name="modify_anndata" value="true"/>
-<param name="anndata_output_format" value="h5ad" />
-</conditional>
 <assert_stdout>
 <has_text_matching expression="sc.pp.subsample"/>
 <has_text_matching expression="fraction=0.5"/>
 <has_text_matching expression="random_state=0"/>
 </assert_stdout>
-<output name="anndata_out_h5ad" file="pp.subsample.krumsiek11_fraction.h5ad" ftype="h5" compare="sim_size"/>
+<output name="anndata_out" file="pp.subsample.krumsiek11_fraction.h5ad" ftype="h5ad" compare="sim_size"/>
 </test>
-<test expect_num_outputs="1">
+<test>
-<conditional name="input">
+<!-- test 8 -->
-<param name="format" value="h5ad" />
+<param name="adata" value="krumsiek11.h5ad" />
-<param name="adata" value="krumsiek11.h5ad" />
-</conditional>
 <conditional name="method">
 <param name="method" value="pp.subsample"/>
 <conditional name="type">
 <param name="type" value="n_obs" />
 <param name="n_obs" value="10"/>
 </conditional>
 <param name="random_state" value="0"/>
 </conditional>
-<conditional name="modify_anndata">
-<param name="modify_anndata" value="true"/>
-<param name="anndata_output_format" value="h5ad" />
-</conditional>
 <assert_stdout>
 <has_text_matching expression="sc.pp.subsample"/>
 <has_text_matching expression="n_obs=10"/>
 <has_text_matching expression="random_state=0"/>
 </assert_stdout>
-<output name="anndata_out_h5ad" file="pp.subsample.krumsiek11_n_obs.h5ad" ftype="h5" compare="sim_size"/>
+<output name="anndata_out" file="pp.subsample.krumsiek11_n_obs.h5ad" ftype="h5ad" compare="sim_size"/>
+</test>
+<test>
+<!-- test 9 -->
+<param name="adata" value="random-randint.h5ad" />
+<conditional name="method">
+<param name="method" value="pp.downsample_counts"/>
+<param name="total_counts" value="20000"/>
+<param name="random_state" value="0"/>
+<param name="replace" value="false"/>
+</conditional>
+<assert_stdout>
+<has_text_matching expression="sc.pp.downsample_counts"/>
+<has_text_matching expression="total_counts=20000"/>
+<has_text_matching expression="random_state=0"/>
+<has_text_matching expression="replace=False"/>
+</assert_stdout>
+<output name="anndata_out" file="pp.downsample_counts.random-randint.h5ad" ftype="h5ad" compare="sim_size"/>
 </test>
 </tests>
 <help><![CDATA[
 Filter cells outliers based on counts and numbers of genes expressed (`pp.filter_cells`)
 Only provide one of the optional parameters `min_counts`, `min_genes`,
 `max_counts`, `max_genes` per call.
 More details on the `scanpy documentation
-<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_cells.html#scanpy.api.pp.filter_cells>`__
+<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.filter_cells.html>`__
-Return
-------
-number_per_cell : Number per cell (either `n_counts` or `n_genes` per cell)
 Filter genes based on number of cells or counts (`pp.filter_genes`)
 ===================================================================
 Only provide one of the optional parameters `min_counts`, `min_cells`,
 `max_counts`, `max_cells` per call.
 More details on the `scanpy documentation
-<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_genes.html#scanpy.api.pp.filter_genes>`__
+<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.filter_genes.html>`__
-Return
-------
+Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories (`tl.filter_rank_genes_groups`)
+==========================================================================================================================================================
-number_per_gene : Number per genes (either `n_counts` or `n_genes` per cell)
-Extract highly variable genes (`pp.filter_genes_dispersion`)
-============================================================
-If trying out parameters, pass the data matrix instead of AnnData.
-Depending on `flavor`, this reproduces the R-implementations of Seurat and Cell Ranger.
-The normalized dispersion is obtained by scaling with the mean and standard
-deviation of the dispersions for genes falling into a given bin for mean
-expression of genes. This means that for each bin of mean expression, highly
-variable genes are selected.
-Use `flavor='cell_ranger'` with care and in the same way as in `pp.recipe_zheng17`.
 More details on the `scanpy documentation
-<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_genes_dispersion.html#scanpy.api.pp.filter_genes_dispersion>`__
+<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.filter_rank_genes_groups.html>`__
-Returns
--------
+Annotate highly variable genes (`pp.highly_variable_genes`)
-- The annotated matrix filtered, with the annotations
+===========================================================
-- A table with the means, dispersions, and normalized dispersions per gene, logarithmized when `log` is `True`.
+It expects logarithmized data.
+Depending on flavor, this reproduces the R-implementations of Seurat or Cell Ranger. The normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for genes falling into a given bin for mean expression of genes. This means that for each bin of mean expression, highly variable genes are selected.
 Subsample to a fraction of the number of observations (`pp.subsample`)
 ======================================================================
 More details on the `scanpy documentation
-<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.subsample.html#scanpy.api.pp.subsample>`__
+<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.subsample.html>`__
+Downsample counts (`pp.downsample_counts`)
+==========================================
+Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This
+has been implemented by M. D. Luecken.
+More details on the `scanpy documentation
+<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.downsample_counts.html>`__
 ]]></help>
 <expand macro="citations"/>
 </tool>

Mercurial > repos > iuc > scanpy_filter

comparison filter.xml @ 1:6a76b60e05f5 draft