Mercurial > repos > iuc > scanpy_filter
diff filter.xml @ 1:6a76b60e05f5 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 8ef5f7c6f8728608a3f05bb51e11b642b84a05f5"
author | iuc |
---|---|
date | Wed, 16 Oct 2019 06:32:33 -0400 |
parents | 6ea5a05a260a |
children | e62673c32a5d |
line wrap: on
line diff
--- a/filter.xml Mon Mar 04 10:15:02 2019 -0500 +++ b/filter.xml Wed Oct 16 06:32:33 2019 -0400 @@ -1,5 +1,5 @@ -<tool id="scanpy_filter" name="Filter with scanpy" version="@galaxy_version@"> - <description></description> +<tool id="scanpy_filter" name="Filter" version="@galaxy_version@"> + <description>with scanpy</description> <macros> <import>macros.xml</import> </macros> @@ -14,94 +14,74 @@ @CMD_read_inputs@ #if $method.method == 'pp.filter_cells' -res = sc.pp.filter_cells( - #if $modify_anndata.modify_anndata == 'true' +sc.pp.filter_cells( adata, - #else - adata.X, - #end if #if $method.filter.filter == 'min_counts' min_counts=$method.filter.min_counts, - #elif $method.filter.filter == 'max_counts' + #else if $method.filter.filter == 'max_counts' max_counts=$method.filter.max_counts, - #elif $method.filter.filter == 'min_genes' + #else if $method.filter.filter == 'min_genes' min_genes=$method.filter.min_genes, - #elif $method.filter.filter == 'max_genes' + #else if $method.filter.filter == 'max_genes' max_genes=$method.filter.max_genes, #end if copy=False) - #if $modify_anndata.modify_anndata == 'true' -df = adata.obs - #else -df = pd.DataFrame(data=dict(cell_subset=res[0], number_per_cell=res[1])) - #end if - - #if $method.filter.filter == 'min_counts' or $method.filter.filter == 'max_counts' -df.to_csv('$counts_per_cell', sep='\t') - #elif $method.filter.filter == 'min_genes' or $method.filter.filter == 'max_genes' -df.to_csv('$genes_per_cell', sep='\t') - #end if - -#elif $method.method == 'pp.filter_genes' -res = sc.pp.filter_genes( - #if $modify_anndata.modify_anndata == 'true' +#else if $method.method == 'pp.filter_genes' +sc.pp.filter_genes( adata, - #else - adata.X, - #end if #if $method.filter.filter == 'min_counts' min_counts=$method.filter.min_counts, - #elif $method.filter.filter == 'max_counts' + #else if $method.filter.filter == 'max_counts' max_counts=$method.filter.max_counts, - #elif $method.filter.filter == 'min_cells' + #else if $method.filter.filter == 'min_cells' min_cells=$method.filter.min_cells, - #elif $method.filter.filter == 'max_cells' + #else if $method.filter.filter == 'max_cells' max_cells=$method.filter.max_cells, #end if copy=False) - #if $modify_anndata.modify_anndata == 'true' -df = adata.var - #else -df = pd.DataFrame(data=dict(gene_subset=res[0], number_per_gene=res[1])) +#else if $method.method == 'tl.filter_rank_genes_groups' +sc.tl.filter_rank_genes_groups( + adata, + #if str($method.key) != '' + key='$method.key', #end if - - #if $method.filter.filter == 'min_counts' or $method.filter.filter == 'max_counts' -df.to_csv('$counts_per_gene', sep='\t') - #elif $method.filter.filter == 'min_cells' or $method.filter.filter == 'max_cells' -df.to_csv('$cells_per_gene', sep='\t') + #if str($method.groupby) != '' + groupby='$method.groupby', #end if + use_raw=$method.use_raw, + log=$method.log, + key_added='$method.key_added', + min_in_group_fraction=$method.min_in_group_fraction, + max_out_group_fraction=$method.max_out_group_fraction, + min_fold_change=$method.min_fold_change) -#elif $method.method == 'pp.filter_genes_dispersion' -res = sc.pp.filter_genes_dispersion( - #if $modify_anndata.modify_anndata == 'true' - adata, - #else - adata.X, - #end if +#else if $method.method == "pp.highly_variable_genes" +sc.pp.highly_variable_genes( + adata=adata, flavor='$method.flavor.flavor', - #if $method.flavor.flavor=='seurat' + #if $method.flavor.flavor == 'seurat' + #if str($method.flavor.min_mean) != '' min_mean=$method.flavor.min_mean, + #end if + #if str($method.flavor.max_mean) != '' max_mean=$method.flavor.max_mean, + #end if + #if str($method.flavor.min_disp) != '' min_disp=$method.flavor.min_disp, - #if $method.flavor.max_disp + #end if + #if str($method.flavor.max_disp) != '' max_disp=$method.flavor.max_disp, #end if - #else + #else if $method.flavor.flavor == 'cell_ranger' n_top_genes=$method.flavor.n_top_genes, #end if n_bins=$method.n_bins, - log=$method.log, - copy=False) + subset=$method.subset, + inplace=True) - #if $modify_anndata.modify_anndata == 'true' -adata.var.to_csv('$per_gene', sep='\t') - #else -pd.DataFrame(res).to_csv('$per_gene', sep='\t') - #end if - -#elif $method.method == 'pp.subsample' +#else if $method.method == 'pp.subsample' sc.pp.subsample( data=adata, #if $method.type.type == 'fraction' @@ -112,9 +92,21 @@ random_state=$method.random_state, copy=False) +#else if $method.method == "pp.downsample_counts" +sc.pp.downsample_counts( + adata=adata, + #if str($method.counts_per_cell) != '' + counts_per_cell=$method.counts_per_cell, + #end if + #if str($method.total_counts) != '' + total_counts=$method.total_counts, + #end if + random_state=$method.random_state, + replace=$method.replace, + copy=False) #end if -@CMD_anndata_write_modify_outputs@ +@CMD_anndata_write_outputs@ ]]></configfile> </configfiles> <inputs> @@ -123,11 +115,10 @@ <param argument="method" type="select" label="Method used for filtering"> <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using `pp.filter_cells`</option> <option value="pp.filter_genes">Filter genes based on number of cells or counts, using `pp.filter_genes`</option> - <option value="pp.filter_genes_dispersion">Extract highly variable genes, using `pp.filter_genes_dispersion`</option> - <!--<option value="pp.highly_variable_genes">, using `tl.highly_variable_genes`</option>!--> + <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using `tl.filter_rank_genes_groups`</option> + <option value="pp.highly_variable_genes">, using `tl.highly_variable_genes`</option> <option value="pp.subsample">Subsample to a fraction of the number of observations, using `pp.subsample`</option> - <!--<option value="queries.gene_coordinates">, using `queries.gene_coordinates`</option>!--> - <!--<option value="queries.mitochondrial_genes">, using `queries.mitochondrial_genes`</option>!--> + <option value="pp.downsample_counts">Downsample counts from count matrix, using `pp.downsample_counts`</option> </param> <when value="pp.filter_cells"> <conditional name="filter"> @@ -160,37 +151,47 @@ <option value="max_cells">Maximum number of cells expressed</option> </param> <when value="min_counts"> - <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering" help=""/> + <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering"/> </when> <when value="max_counts"> - <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering" help=""/> + <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering"/> </when> <when value="min_cells"> - <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering" help=""/> + <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering"/> </when> <when value="max_cells"> - <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering" help=""/> + <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering"/> </when> </conditional> </when> - <when value="pp.filter_genes_dispersion"> + <when value="tl.filter_rank_genes_groups"> + <param argument="key" type="text" optional="true" label="Key in adata.uns where the rank_genes_groups output is stored"/> + <param argument="groupby" type="text" optional="true" label="The key of the observations grouping to consider"/> + <expand macro="param_use_raw"/> + <expand macro="param_log"/> + <param argument="key_added" type="text" value="rank_genes_groups_filtered" label="Key that will contain new values"/> + <param argument="min_in_group_fraction" type="float" min="0" max="1" value="0.25" label="Minimum fraction of genes expressing the gene within the categories"/> + <param argument="max_out_group_fraction" type="float" min="0" max="1" value="0.5" label="Maximum fraction of genes expressing the gene outside the categories"/> + <param argument="min_fold_change" type="integer" value="2" label="Minimum fold change"/> + </when> + <when value="pp.highly_variable_genes"> <conditional name='flavor'> - <param argument="flavor" type="select" label="Flavor for computing normalized dispersion" help=""> + <param argument="flavor" type="select" label="Flavor for computing normalized dispersion"> <option value="seurat">seurat: expects non-logarithmized data</option> <option value="cell_ranger">cell_ranger: usually called for logarithmized data</option> </param> <when value="seurat"> - <param argument="min_mean" type="float" value="0.0125" label="Minimal mean cutoff" help=""/> - <param argument="max_mean" type="float" value="3" label="Maximal mean cutoff" help=""/> - <param argument="min_disp" type="float" value="0.5" label="Minimal normalized dispersion cutoff" help=""/> - <param argument="max_disp" type="float" value="" optional="true" label="Maximal normalized dispersion cutoff" help=""/> + <param argument="min_mean" type="float" value="0.0125" label="Minimal mean cutoff"/> + <param argument="max_mean" type="float" value="3" label="Maximal mean cutoff"/> + <param argument="min_disp" type="float" value="0.5" label="Minimal normalized dispersion cutoff"/> + <param argument="max_disp" type="float" value="" optional="true" label="Maximal normalized dispersion cutoff"/> </when> <when value="cell_ranger"> - <param argument="n_top_genes" type="integer" value="" label="Number of highly-variable genes to keep" help=""/> + <param argument="n_top_genes" type="integer" value="" label="Number of highly-variable genes to keep"/> </when> </conditional> <param argument="n_bins" type="integer" value="20" label="Number of bins for binning the mean gene expression" help="Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1"/> - <expand macro="param_log"/> + <param argument="subset" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Inplace subset to highly-variable genes?" help="Otherwise it merely indicates highly variable genes."/> </when> <when value="pp.subsample"> <conditional name="type"> @@ -199,44 +200,29 @@ <option value="n_obs">By number of observation</option> </param> <when value="fraction"> - <param argument="fraction" type="float" value="" label="Subsample to this `fraction` of the number of observations" help=""/> + <param argument="fraction" type="float" value="" label="Subsample to this `fraction` of the number of observations"/> </when> <when value="n_obs"> - <param argument="n_obs" type="integer" min="0" value="" label="Subsample to this number of observations" help=""/> + <param argument="n_obs" type="integer" min="0" value="" label="Subsample to this number of observations"/> </when> </conditional> - <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling" help=""/> + <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/> + </when> + <when value="pp.downsample_counts"> + <param argument="counts_per_cell" type="integer" min="0" optional="true" label="Target total counts per cell" help="If a cell has more than ‘counts_per_cell’, it will be downsampled to this number. Resulting counts can be specified on a per cell basis by passing an array."/> + <param argument="total_counts" type="integer" min="0" optional="true" label="Target total counts" help="If the count matrix has more than total_counts it will be downsampled to have this number."/> + <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/> + <param argument="replace" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Sample the counts with replacement?"/> </when> </conditional> - <expand macro="anndata_modify_output_input"/> </inputs> <outputs> - <expand macro="anndata_modify_outputs"/> - <!-- for pp.filter_cells --> - <data name="counts_per_cell" format="tabular" label="${tool.name} on ${on_string}: Counts per cells after filtering"> - <filter>method['method'] == 'pp.filter_cells' and (method['filter']['filter'] == 'min_counts' or method['filter']['filter'] == 'max_counts')</filter> - </data> - <data name="genes_per_cell" format="tabular" label="${tool.name} on ${on_string}: Number of genes per cell after filtering"> - <filter>method['method'] == 'pp.filter_cells' and (method['filter']['filter'] == 'min_genes' or method['filter']['filter'] == 'max_genes')</filter> - </data> - <!-- for pp.filter_genes --> - <data name="counts_per_gene" format="tabular" label="${tool.name} on ${on_string}: Counts per genes after filtering"> - <filter>method['method'] == 'pp.filter_genes' and (method['filter']['filter'] == 'min_counts' or method['filter']['filter'] == 'max_counts')</filter> - </data> - <data name="cells_per_gene" format="tabular" label="${tool.name} on ${on_string}: Number of cells per genes after filtering"> - <filter>method['method'] == 'pp.filter_genes' and (method['filter']['filter'] == 'min_cells' or method['filter']['filter'] == 'max_cells')</filter> - </data> - <!-- for pp.filter_genes_dispersion --> - <data name="per_gene" format="tabular" label="${tool.name} on ${on_string}: Means, dispersions and normalized dispersions per gene"> - <filter>method['method'] == 'pp.filter_genes_dispersion'</filter> - </data> + <expand macro="anndata_outputs"/> </outputs> <tests> - <test expect_num_outputs="2"> - <conditional name="input"> - <param name="format" value="h5ad" /> - <param name="adata" value="krumsiek11.h5ad" /> - </conditional> + <test> + <!-- test 1 --> + <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.filter_cells"/> <conditional name="filter"> @@ -244,67 +230,15 @@ <param name="min_counts" value="3"/> </conditional> </conditional> - <conditional name="modify_anndata"> - <param name="modify_anndata" value="true"/> - <param name="anndata_output_format" value="h5ad" /> - </conditional> <assert_stdout> <has_text_matching expression="sc.pp.filter_cells"/> <has_text_matching expression="min_counts=3"/> </assert_stdout> - <output name="anndata_out_h5ad" file="pp.filter_cells.krumsiek11-min_counts.h5ad" ftype="h5" compare="sim_size"/> - <output name="counts_per_cell"> - <assert_contents> - <has_text_matching expression="cell_type\tn_counts" /> - <has_text_matching expression="46\tprogenitor\t3.028" /> - <has_text_matching expression="85\tEry\t3.7001" /> - <has_text_matching expression="150\tMk\t4.095" /> - <has_n_columns n="3" /> - </assert_contents> - </output> + <output name="anndata_out" file="pp.filter_cells.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/> </test> - <test expect_num_outputs="2"> - <conditional name="input"> - <param name="format" value="loom" /> - <param name="adata" value="krumsiek11.loom" /> - <param name="sparse" value="True"/> - <param name="cleanup" value="False"/> - <param name="x_name" value="spliced"/> - <param name="obs_names" value="CellID" /> - <param name="var_names" value="Gene"/> - </conditional> - <conditional name="method"> - <param name="method" value="pp.filter_cells"/> - <conditional name="filter"> - <param name="filter" value="min_counts"/> - <param name="min_counts" value="3"/> - </conditional> - </conditional> - <conditional name="modify_anndata"> - <param name="modify_anndata" value="true"/> - <param name="anndata_output_format" value="loom" /> - </conditional> - <assert_stdout> - <has_text_matching expression="sc.pp.filter_cells"/> - <has_text_matching expression="min_counts=3"/> - </assert_stdout> - <output name="anndata_out_loom" file="pp.filter_cells.krumsiek11-min_counts.loom" ftype="loom" compare="sim_size"/> - <output name="counts_per_cell"> - <assert_contents> - <has_text_matching expression="cell_type\tn_counts" /> - <has_text_matching expression="46\tprogenitor\t3.028" /> - <has_text_matching expression="85\tEry\t3.7001" /> - <has_text_matching expression="97\tMo\t3.925" /> - <has_text_matching expression="150\tMk\t4.095" /> - <has_n_columns n="3" /> - </assert_contents> - </output> - </test> - <test expect_num_outputs="1"> - <conditional name="input"> - <param name="format" value="h5ad" /> - <param name="adata" value="krumsiek11.h5ad"/> - </conditional> + <test> + <!-- test 2 --> + <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.filter_cells"/> <conditional name="filter"> @@ -312,21 +246,16 @@ <param name="max_genes" value="100"/> </conditional> </conditional> - <conditional name="modify_anndata"> - <param name="modify_anndata" value="false"/> - </conditional> <assert_stdout> <has_text_matching expression="sc.pp.filter_cells"/> - <has_text_matching expression="adata.X"/> + <has_text_matching expression="adata"/> <has_text_matching expression="max_genes=100"/> </assert_stdout> - <output name="genes_per_cell" file="pp.filter_cells.number_per_cell.krumsiek11-max_genes.tabular"/> + <output name="anndata_out" file="pp.filter_cells.krumsiek11-max_genes.h5ad" ftype="h5ad" compare="sim_size"/> </test> - <test expect_num_outputs="2"> - <conditional name="input"> - <param name="format" value="h5ad" /> - <param name="adata" value="krumsiek11.h5ad" /> - </conditional> + <test> + <!-- test 3 --> + <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.filter_genes"/> <conditional name="filter"> @@ -334,102 +263,84 @@ <param name="min_counts" value="3"/> </conditional> </conditional> - <conditional name="modify_anndata"> - <param name="modify_anndata" value="true"/> - <param name="anndata_output_format" value="h5ad" /> - </conditional> <assert_stdout> <has_text_matching expression="sc.pp.filter_genes"/> <has_text_matching expression="min_counts=3"/> </assert_stdout> - <output name="anndata_out_h5ad" file="pp.filter_genes.krumsiek11-min_counts.h5ad" ftype="h5" compare="sim_size"/> - <output name="counts_per_gene" file="pp.filter_genes.number_per_gene.krumsiek11-min_counts.tabular"/> + <output name="anndata_out" file="pp.filter_genes.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/> </test> - <test expect_num_outputs="1"> - <conditional name="input"> - <param name="format" value="h5ad" /> - <param name="adata" value="pbmc68k_reduced.h5ad"/> - </conditional> + <test> + <!-- test 4 --> + <param name="adata" value="tl.rank_genes_groups.krumsiek11.h5ad" /> <conditional name="method"> - <param name="method" value="pp.filter_genes"/> - <conditional name="filter"> - <param name="filter" value="max_cells"/> - <param name="max_cells" value="500"/> - </conditional> - </conditional> - <conditional name="modify_anndata"> - <param name="modify_anndata" value="false"/> + <param name="method" value="tl.filter_rank_genes_groups"/> + <param name="key" value="rank_genes_groups"/> + <param name="use_raw" value="False"/> + <param name="log" value="False"/> + <param name="key_added" value="rank_genes_groups_filtered"/> + <param name="min_in_group_fraction" value="0.25"/> + <param name="max_out_group_fraction" value="0.5"/> + <param name="min_fold_change" value="3"/> </conditional> <assert_stdout> - <has_text_matching expression="sc.pp.filter_genes"/> - <has_text_matching expression="adata.X"/> - <has_text_matching expression="max_cells=500"/> + <has_text_matching expression="tl.filter_rank_genes_groups"/> + <has_text_matching expression="key='rank_genes_groups'"/> + <has_text_matching expression="use_raw=False"/> + <has_text_matching expression="log=False"/> + <has_text_matching expression="key_added='rank_genes_groups_filtered'"/> + <has_text_matching expression="min_in_group_fraction=0.25"/> + <has_text_matching expression="max_out_group_fraction=0.5"/> + <has_text_matching expression="min_fold_change=3"/> </assert_stdout> - <output name="cells_per_gene" file="pp.filter_genes.number_per_gene.pbmc68k_reduced-max_cells.tabular"/> + <output name="anndata_out" file="pp.filter_rank_genes_groups.h5ad" ftype="h5ad" compare="sim_size"/> </test> - <test expect_num_outputs="2"> - <conditional name="input"> - <param name="format" value="h5ad" /> - <param name="adata" value="krumsiek11.h5ad" /> - </conditional> + <test> + <!-- test 5 --> + <param name="adata" value="blobs.h5ad"/> <conditional name="method"> - <param name="method" value="pp.filter_genes_dispersion"/> + <param name="method" value="pp.highly_variable_genes"/> <conditional name="flavor"> <param name="flavor" value="seurat"/> <param name="min_mean" value="0.0125"/> <param name="max_mean" value="3"/> <param name="min_disp" value="0.5"/> </conditional> - <param name="n_bins" value="20" /> - <param name="log" value="true"/> - </conditional> - <conditional name="modify_anndata"> - <param name="modify_anndata" value="true"/> - <param name="anndata_output_format" value="h5ad" /> + <param name="n_bins" value="20"/> + <param name="subset" value="false"/> </conditional> <assert_stdout> - <has_text_matching expression="sc.pp.filter_genes_dispersion"/> + <has_text_matching expression="sc.pp.highly_variable_genes"/> <has_text_matching expression="flavor='seurat'"/> <has_text_matching expression="min_mean=0.0125"/> - <has_text_matching expression="max_mean=3.0"/> + <has_text_matching expression="max_mean=3"/> <has_text_matching expression="min_disp=0.5"/> <has_text_matching expression="n_bins=20"/> - <has_text_matching expression="log=True"/> + <has_text_matching expression="subset=False"/> </assert_stdout> - <output name="anndata_out_h5ad" file="pp.filter_genes_dispersion.krumsiek11-seurat.h5ad" ftype="h5" compare="sim_size"/> - <output name="per_gene" file="pp.filter_genes_dispersion.per_gene.krumsiek11-seurat.tabular"/> + <output name="anndata_out" file="pp.highly_variable_genes.seurat.blobs.h5ad" ftype="h5ad" compare="sim_size"/> </test> - <test expect_num_outputs="1"> - <conditional name="input"> - <param name="format" value="h5ad" /> - <param name="adata" value="krumsiek11.h5ad" /> - </conditional> + <test> + <!-- test 6 --> + <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> - <param name="method" value="pp.filter_genes_dispersion"/> + <param name="method" value="pp.highly_variable_genes"/> <conditional name="flavor"> <param name="flavor" value="cell_ranger"/> <param name="n_top_genes" value="2"/> </conditional> <param name="n_bins" value="20"/> - <param name="log" value="true"/> - </conditional> - <conditional name="modify_anndata"> - <param name="modify_anndata" value="false"/> </conditional> <assert_stdout> - <has_text_matching expression="sc.pp.filter_genes_dispersion"/> + <has_text_matching expression="sc.pp.highly_variable_genes"/> <has_text_matching expression="flavor='cell_ranger'"/> <has_text_matching expression="n_top_genes=2"/> <has_text_matching expression="n_bins=20"/> - <has_text_matching expression="og=True"/> </assert_stdout> - <output name="per_gene" file="pp.filter_genes_dispersion.per_gene.krumsiek11-cell_ranger.tabular"/> + <output name="anndata_out" file="pp.highly_variable_genes.krumsiek11-cell_ranger.h5ad" ftype="h5ad" compare="sim_size"/> </test> - <test expect_num_outputs="1"> - <conditional name="input"> - <param name="format" value="h5ad" /> - <param name="adata" value="krumsiek11.h5ad" /> - </conditional> + <test> + <!-- test 7 --> + <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.subsample"/> <conditional name="type"> @@ -438,22 +349,16 @@ </conditional> <param name="random_state" value="0"/> </conditional> - <conditional name="modify_anndata"> - <param name="modify_anndata" value="true"/> - <param name="anndata_output_format" value="h5ad" /> - </conditional> <assert_stdout> <has_text_matching expression="sc.pp.subsample"/> <has_text_matching expression="fraction=0.5"/> <has_text_matching expression="random_state=0"/> </assert_stdout> - <output name="anndata_out_h5ad" file="pp.subsample.krumsiek11_fraction.h5ad" ftype="h5" compare="sim_size"/> + <output name="anndata_out" file="pp.subsample.krumsiek11_fraction.h5ad" ftype="h5ad" compare="sim_size"/> </test> - <test expect_num_outputs="1"> - <conditional name="input"> - <param name="format" value="h5ad" /> - <param name="adata" value="krumsiek11.h5ad" /> - </conditional> + <test> + <!-- test 8 --> + <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.subsample"/> <conditional name="type"> @@ -462,16 +367,29 @@ </conditional> <param name="random_state" value="0"/> </conditional> - <conditional name="modify_anndata"> - <param name="modify_anndata" value="true"/> - <param name="anndata_output_format" value="h5ad" /> - </conditional> <assert_stdout> <has_text_matching expression="sc.pp.subsample"/> <has_text_matching expression="n_obs=10"/> <has_text_matching expression="random_state=0"/> </assert_stdout> - <output name="anndata_out_h5ad" file="pp.subsample.krumsiek11_n_obs.h5ad" ftype="h5" compare="sim_size"/> + <output name="anndata_out" file="pp.subsample.krumsiek11_n_obs.h5ad" ftype="h5ad" compare="sim_size"/> + </test> + <test> + <!-- test 9 --> + <param name="adata" value="random-randint.h5ad" /> + <conditional name="method"> + <param name="method" value="pp.downsample_counts"/> + <param name="total_counts" value="20000"/> + <param name="random_state" value="0"/> + <param name="replace" value="false"/> + </conditional> + <assert_stdout> + <has_text_matching expression="sc.pp.downsample_counts"/> + <has_text_matching expression="total_counts=20000"/> + <has_text_matching expression="random_state=0"/> + <has_text_matching expression="replace=False"/> + </assert_stdout> + <output name="anndata_out" file="pp.downsample_counts.random-randint.h5ad" ftype="h5ad" compare="sim_size"/> </test> </tests> <help><![CDATA[ @@ -487,12 +405,7 @@ `max_counts`, `max_genes` per call. More details on the `scanpy documentation -<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_cells.html#scanpy.api.pp.filter_cells>`__ - -Return ------- - -number_per_cell : Number per cell (either `n_counts` or `n_genes` per cell) +<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.filter_cells.html>`__ Filter genes based on number of cells or counts (`pp.filter_genes`) @@ -506,42 +419,38 @@ `max_counts`, `max_cells` per call. More details on the `scanpy documentation -<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_genes.html#scanpy.api.pp.filter_genes>`__ - -Return ------- - -number_per_gene : Number per genes (either `n_counts` or `n_genes` per cell) +<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.filter_genes.html>`__ -Extract highly variable genes (`pp.filter_genes_dispersion`) -============================================================ - -If trying out parameters, pass the data matrix instead of AnnData. - -Depending on `flavor`, this reproduces the R-implementations of Seurat and Cell Ranger. - -The normalized dispersion is obtained by scaling with the mean and standard -deviation of the dispersions for genes falling into a given bin for mean -expression of genes. This means that for each bin of mean expression, highly -variable genes are selected. - -Use `flavor='cell_ranger'` with care and in the same way as in `pp.recipe_zheng17`. +Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories (`tl.filter_rank_genes_groups`) +========================================================================================================================================================== More details on the `scanpy documentation -<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.filter_genes_dispersion.html#scanpy.api.pp.filter_genes_dispersion>`__ +<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.filter_rank_genes_groups.html>`__ + -Returns -------- -- The annotated matrix filtered, with the annotations -- A table with the means, dispersions, and normalized dispersions per gene, logarithmized when `log` is `True`. +Annotate highly variable genes (`pp.highly_variable_genes`) +=========================================================== + +It expects logarithmized data. + +Depending on flavor, this reproduces the R-implementations of Seurat or Cell Ranger. The normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for genes falling into a given bin for mean expression of genes. This means that for each bin of mean expression, highly variable genes are selected. Subsample to a fraction of the number of observations (`pp.subsample`) ====================================================================== More details on the `scanpy documentation -<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.subsample.html#scanpy.api.pp.subsample>`__ +<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.subsample.html>`__ + +Downsample counts (`pp.downsample_counts`) +========================================== + +Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This +has been implemented by M. D. Luecken. + +More details on the `scanpy documentation +<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.downsample_counts.html>`__ ]]></help>