Mercurial > repos > iuc > scanpy_filter
changeset 21:5b3c1679d29b draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 637a6ebb9ca7f745c83146151cb1655cc902afc6
author | iuc |
---|---|
date | Thu, 09 Jan 2025 15:54:14 +0000 |
parents | 64388be6d510 |
children | |
files | filter.xml |
diffstat | 1 files changed, 181 insertions(+), 19 deletions(-) [+] |
line wrap: on
line diff
--- a/filter.xml Fri Oct 18 10:35:58 2024 +0000 +++ b/filter.xml Thu Jan 09 15:54:14 2025 +0000 @@ -1,4 +1,4 @@ -<tool id="scanpy_filter" name="Scanpy filter" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> +<tool id="scanpy_filter" name="Scanpy filter" version="@TOOL_VERSION@+galaxy3" profile="@PROFILE@"> <description>mark and subsample</description> <macros> <import>macros.xml</import> @@ -49,6 +49,58 @@ @CMD_ANNDATA_WRITE_OUTPUTS@ +#else if $method.method == 'filter_any' + #if $method.filter.filter == 'key' + #if $method.var_obs == 'var' +filtered = adata.var['$method.filter.key'] + #else if $method.var_obs == 'obs' +filtered = adata.obs['$method.filter.key'] + #end if + + #if $method.filter.filter_key.type == 'number' + #if $method.filter.filter_key.filter == 'equal' +filtered = filtered == $method.filter.filter_key.value + #else if $method.filter.filter_key.filter == 'equal' +filtered = filtered != $method.filter.filter_key.value + #else if $method.filter.filter_key.filter == 'less' +filtered = filtered < $method.filter.filter_key.value + #else if $method.filter.filter_key.filter == 'less_or_equal' +filtered = filtered <= $method.filter.filter_key.value + #else if $method.filter.filter_key.filter == 'greater' +filtered = filtered > $method.filter.filter_key.value + #else if $method.filter.filter_key.filter == 'greater_or_equal' +filtered = filtered >= $method.filter.filter_key.value + #end if + #else if $method.filter.filter_key.type == 'text' + #if $method.filter.filter_key.filter == 'equal' +filtered = filtered == '$method.filter.filter_key.value' + #else +filtered = filtered != '$method.filter.filter_key.value' + #end if + #else if $method.filter.filter_key.type == 'boolean' +filtered = filtered == $method.filter.filter_key.value + #end if + + #else if $method.filter.filter == 'index' + #if str($method.filter.index.format) == 'file' +with open('$method.filter.index.file', 'r') as filter_f: + filters = [str(x.strip()) for x in filter_f.readlines()] +filtered = filters + #else + #set $filters = [str(x.strip()) for x in $method.filter.index.text.split(',')] +filtered = $filters + #end if + #end if +print(filtered) + + #if $method.var_obs == 'var' +adata = adata[:,filtered] + #else if $method.var_obs == 'obs' +adata = adata[filtered, :] + #end if + +@CMD_ANNDATA_WRITE_OUTPUTS@ + #else if $method.method == 'tl.filter_rank_genes_groups' sc.tl.filter_rank_genes_groups( adata, @@ -218,6 +270,7 @@ <param argument="method" type="select" label="Method used for filtering"> <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using 'pp.filter_cells'</option> <option value="pp.filter_genes">Filter genes based on number of cells or counts, using 'pp.filter_genes'</option> + <option value="filter_any">Filter on any column of observations or variables</option> <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using 'tl.filter_rank_genes_groups'</option> <option value="pp.highly_variable_genes">Annotate (and filter) highly variable genes, using 'pp.highly_variable_genes'</option> <option value="pp.subsample">Subsample to a fraction of the number of observations, using 'pp.subsample'</option> @@ -234,16 +287,16 @@ <option value="max_genes">Maximum number of genes expressed</option> </param> <when value="min_counts"> - <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a cell to pass filtering"/> + <param argument="min_counts" type="integer" min="0" value="0" label="Minimum number of counts required for a cell to pass filtering"/> </when> <when value="max_counts"> - <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a cell to pass filtering"/> + <param argument="max_counts" type="integer" min="0" value="100000000" label="Maximum number of counts required for a cell to pass filtering"/> </when> <when value="min_genes"> - <param argument="min_genes" type="integer" min="0" value="" label="Minimum number of genes expressed required for a cell to pass filtering"/> + <param argument="min_genes" type="integer" min="0" value="0" label="Minimum number of genes expressed required for a cell to pass filtering"/> </when> <when value="max_genes"> - <param argument="max_genes" type="integer" min="0" value="" label="Maximum number of genes expressed required for a cell to pass filtering"/> + <param argument="max_genes" type="integer" min="0" value="100000000" label="Maximum number of genes expressed required for a cell to pass filtering"/> </when> </conditional> </when> @@ -256,16 +309,72 @@ <option value="max_cells">Maximum number of cells expressed</option> </param> <when value="min_counts"> - <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering"/> + <param argument="min_counts" type="integer" min="0" value="" optional="true" label="Minimum number of counts required for a gene to pass filtering"/> </when> <when value="max_counts"> - <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering"/> + <param argument="max_counts" type="integer" min="0" value="" optional="true" label="Maximum number of counts required for a gene to pass filtering"/> </when> <when value="min_cells"> - <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering"/> + <param argument="min_cells" type="integer" min="0" value="" optional="true" label="Minimum number of cells expressed required for a gene to pass filtering"/> </when> <when value="max_cells"> - <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering"/> + <param argument="max_cells" type="integer" min="0" value="" optional="true" label="Maximum number of cells expressed required for a gene to pass filtering"/> + </when> + </conditional> + </when> + <when value="filter_any"> + <param name="var_obs" type="select" label="What to filter?"> + <option value="var">Variables (var)</option> + <option value="obs">Observations (obs)</option> + </param> + <conditional name="filter"> + <param name="filter" type="select" label="Type of filtering?"> + <option value="key">By key (column) values</option> + <option value="index">By index (row)</option> + </param> + <when value="key"> + <param name="key" type="text" value="n_genes" label="Key to filter"/> + <conditional name="filter_key"> + <param name="type" type="select" label="Type of value to filter"> + <option value="number">Number</option> + <option value="text">Text</option> + <option value="boolean">Boolean</option> + </param> + <when value="number"> + <param name="filter" type="select" label="Filter"> + <option value="equal">equal to</option> + <option value="not_equal">not equal to</option> + <option value="less">less than</option> + <option value="less_or_equal">less than or equal to</option> + <option value="greater">greater than</option> + <option value="greater_or_equal">greater than or equal to</option> + </param> + <param name="value" type="float" value="2500" label="Value"/> + </when> + <when value="text"> + <param name="filter" type="select" label="Filter"> + <option value="equal">equal to</option> + <option value="not_equal">not equal to</option></param> + <param name="value" type="text" value="2500" label="Value"/> + </when> + <when value="boolean"> + <param name="value" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Value to keep"/> + </when> + </conditional> + </when> + <when value="index"> + <conditional name="index"> + <param name="format" type="select" label="Format for the filter by index"> + <option value="file">File</option> + <option value="text" selected="true">Text</option> + </param> + <when value="text"> + <param name="text" type="text" value="" label="List of index to keep" help="Indexes separated by a comma"/> + </when> + <when value="file"> + <param name="file" type="data" format="txt" label="File with the list of index to keep" help="One index per line"/> + </when> + </conditional> </when> </conditional> </when> @@ -478,7 +587,60 @@ </output> </test> - <!-- test 4 --> + <test expect_num_outputs="1"> + <!-- test 4 --> + <param name="adata" value="krumsiek11.h5ad"/> + <conditional name="method"> + <param name="method" value="filter_any"/> + <param name="var_obs" value="var"/> + <conditional name="filter"> + <param name="filter" value="index"/> + <conditional name="index"> + <param name="format" value="text"/> + <param name="text" value="Gata2,EKLF"/> + </conditional> + </conditional> + </conditional> + <assert_stdout> + <has_text_matching expression="640 × 2"/> + </assert_stdout> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="obs/cell_type"/> + <has_h5_keys keys="uns/highlights"/> + <has_h5_keys keys="uns/iroot"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <!-- test 5 --> + <param name="adata" value="krumsiek11.h5ad"/> + <conditional name="method"> + <param name="method" value="filter_any"/> + <param name="var_obs" value="obs"/> + <conditional name="filter"> + <param name="filter" value="key"/> + <param name="key" value="cell_type"/> + <conditional name="filter_key"> + <param name="type" value="text"/> + <param name="filter" value="equal"/> + <param name="value" value="progenitor"/> + </conditional> + </conditional> + </conditional> + <assert_stdout> + <has_text_matching expression="320 × 11"/> + </assert_stdout> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="obs/cell_type"/> + <has_h5_keys keys="uns/highlights"/> + <has_h5_keys keys="uns/iroot"/> + </assert_contents> + </output> + </test> + + <!-- test 6 --> <!-- Fails to write to anndata after tl.filter_rank_genes_groups Issue has been reported here: https://github.com/scverse/anndata/issues/726 The current fix is: del adata.uns['rank_genes_groups_filtered'] --> @@ -511,7 +673,7 @@ </output> </test> - <!-- test 5 --> + <!-- test 7 --> <test expect_num_outputs="2"> <param name="adata" value="blobs.h5ad"/> <conditional name="method"> @@ -539,7 +701,7 @@ </output> </test> - <!-- test 6 --> + <!-- test 8 --> <test expect_num_outputs="2"> <param name="adata" value="krumsiek11.h5ad"/> <conditional name="method"> @@ -570,7 +732,7 @@ </output> </test> - <!-- test 7 --> + <!-- test 9 --> <test expect_num_outputs="2"> <param name="adata" value="krumsiek11.h5ad"/> <conditional name="method"> @@ -600,8 +762,8 @@ </output> </test> - <!-- test 8 --> - <test expect_num_outputs="2"> + <!-- test 10 --> + <test expect_num_outputs="2"> <param name="adata" value="krumsiek11.h5ad"/> <conditional name="method"> <param name="method" value="pp.subsample"/> @@ -630,7 +792,7 @@ </output> </test> - <!-- test 9 --> + <!-- test 11 --> <test expect_num_outputs="2"> <param name="adata" value="random-randint.h5ad"/> <conditional name="method"> @@ -657,7 +819,7 @@ </output> </test> - <!-- test 10 --> + <!-- test 12 --> <test expect_num_outputs="2"> <param name="adata" value="random-randint.h5ad"/> <conditional name="method"> @@ -686,7 +848,7 @@ </output> </test> - <!-- test 10 --> + <!-- test 13 --> <test expect_num_outputs="2"> <param name="adata" value="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_1.h5ad"/> <conditional name="method"> @@ -716,7 +878,7 @@ </output> </test> - <!-- test 11 --> + <!-- test 14 --> <test expect_num_outputs="2"> <param name="adata" value="krumsiek11.h5ad"/> <conditional name="method">