changeset 21:5b3c1679d29b draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 637a6ebb9ca7f745c83146151cb1655cc902afc6
author iuc
date Thu, 09 Jan 2025 15:54:14 +0000
parents 64388be6d510
children
files filter.xml
diffstat 1 files changed, 181 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/filter.xml	Fri Oct 18 10:35:58 2024 +0000
+++ b/filter.xml	Thu Jan 09 15:54:14 2025 +0000
@@ -1,4 +1,4 @@
-<tool id="scanpy_filter" name="Scanpy filter" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+<tool id="scanpy_filter" name="Scanpy filter" version="@TOOL_VERSION@+galaxy3" profile="@PROFILE@">
     <description>mark and subsample</description>
     <macros>
         <import>macros.xml</import>
@@ -49,6 +49,58 @@
 
 @CMD_ANNDATA_WRITE_OUTPUTS@
 
+#else if $method.method == 'filter_any'
+    #if $method.filter.filter == 'key'
+        #if $method.var_obs == 'var'
+filtered = adata.var['$method.filter.key']
+        #else if $method.var_obs == 'obs'
+filtered = adata.obs['$method.filter.key']
+        #end if
+
+        #if $method.filter.filter_key.type == 'number'
+            #if $method.filter.filter_key.filter == 'equal'
+filtered = filtered == $method.filter.filter_key.value
+            #else if $method.filter.filter_key.filter == 'equal'
+filtered = filtered != $method.filter.filter_key.value
+            #else if $method.filter.filter_key.filter == 'less'
+filtered = filtered < $method.filter.filter_key.value
+            #else if $method.filter.filter_key.filter == 'less_or_equal'
+filtered = filtered <= $method.filter.filter_key.value
+            #else if $method.filter.filter_key.filter == 'greater'
+filtered = filtered > $method.filter.filter_key.value
+            #else if $method.filter.filter_key.filter == 'greater_or_equal'
+filtered = filtered >= $method.filter.filter_key.value
+            #end if
+        #else if $method.filter.filter_key.type == 'text'
+            #if $method.filter.filter_key.filter == 'equal'
+filtered = filtered == '$method.filter.filter_key.value'
+            #else
+filtered = filtered != '$method.filter.filter_key.value'
+            #end if
+        #else if $method.filter.filter_key.type == 'boolean'
+filtered = filtered == $method.filter.filter_key.value
+        #end if
+
+    #else if $method.filter.filter == 'index'
+        #if str($method.filter.index.format) == 'file'
+with open('$method.filter.index.file', 'r') as filter_f:
+    filters = [str(x.strip()) for x in filter_f.readlines()]
+filtered = filters
+        #else
+            #set $filters = [str(x.strip()) for x in $method.filter.index.text.split(',')]
+filtered = $filters
+        #end if
+    #end if
+print(filtered)
+
+    #if $method.var_obs == 'var'
+adata = adata[:,filtered]
+    #else if $method.var_obs == 'obs'
+adata = adata[filtered, :]
+    #end if
+
+@CMD_ANNDATA_WRITE_OUTPUTS@
+
 #else if $method.method == 'tl.filter_rank_genes_groups'
 sc.tl.filter_rank_genes_groups(
     adata,
@@ -218,6 +270,7 @@
             <param argument="method" type="select" label="Method used for filtering">
                 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using 'pp.filter_cells'</option>
                 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using 'pp.filter_genes'</option>
+                <option value="filter_any">Filter on any column of observations or variables</option>
                 <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using 'tl.filter_rank_genes_groups'</option>
                 <option value="pp.highly_variable_genes">Annotate (and filter) highly variable genes, using 'pp.highly_variable_genes'</option>
                 <option value="pp.subsample">Subsample to a fraction of the number of observations, using 'pp.subsample'</option>
@@ -234,16 +287,16 @@
                         <option value="max_genes">Maximum number of genes expressed</option>
                     </param>
                     <when value="min_counts">
-                        <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a cell to pass filtering"/>
+                        <param argument="min_counts" type="integer" min="0" value="0" label="Minimum number of counts required for a cell to pass filtering"/>
                     </when>
                     <when value="max_counts">
-                        <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a cell to pass filtering"/>
+                        <param argument="max_counts" type="integer" min="0" value="100000000" label="Maximum number of counts required for a cell to pass filtering"/>
                     </when>
                     <when value="min_genes">
-                        <param argument="min_genes" type="integer" min="0" value="" label="Minimum number of genes expressed required for a cell to pass filtering"/>
+                        <param argument="min_genes" type="integer" min="0" value="0" label="Minimum number of genes expressed required for a cell to pass filtering"/>
                     </when>
                     <when value="max_genes">
-                        <param argument="max_genes" type="integer" min="0" value="" label="Maximum number of genes expressed required for a cell to pass filtering"/>
+                        <param argument="max_genes" type="integer" min="0" value="100000000" label="Maximum number of genes expressed required for a cell to pass filtering"/>
                     </when>
                 </conditional>
             </when>
@@ -256,16 +309,72 @@
                         <option value="max_cells">Maximum number of cells expressed</option>
                     </param>
                     <when value="min_counts">
-                        <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering"/>
+                        <param argument="min_counts" type="integer" min="0" value="" optional="true" label="Minimum number of counts required for a gene to pass filtering"/>
                     </when>
                     <when value="max_counts">
-                        <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering"/>
+                        <param argument="max_counts" type="integer" min="0" value="" optional="true" label="Maximum number of counts required for a gene to pass filtering"/>
                     </when>
                     <when value="min_cells">
-                        <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering"/>
+                        <param argument="min_cells" type="integer" min="0" value="" optional="true" label="Minimum number of cells expressed required for a gene to pass filtering"/>
                     </when>
                     <when value="max_cells">
-                        <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering"/>
+                        <param argument="max_cells" type="integer" min="0" value="" optional="true" label="Maximum number of cells expressed required for a gene to pass filtering"/>
+                    </when>
+                </conditional>
+            </when>
+            <when value="filter_any">
+                <param name="var_obs" type="select" label="What to filter?">
+                    <option value="var">Variables (var)</option>
+                    <option value="obs">Observations (obs)</option>
+                </param>
+                <conditional name="filter">
+                    <param name="filter" type="select" label="Type of filtering?">
+                        <option value="key">By key (column) values</option>
+                        <option value="index">By index (row)</option>
+                    </param>
+                    <when value="key">
+                        <param name="key" type="text" value="n_genes" label="Key to filter"/>
+                        <conditional name="filter_key">
+                            <param name="type" type="select" label="Type of value to filter">
+                                <option value="number">Number</option>
+                                <option value="text">Text</option>
+                                <option value="boolean">Boolean</option>
+                            </param>
+                            <when value="number">
+                                <param name="filter" type="select" label="Filter">
+                                    <option value="equal">equal to</option>
+                                    <option value="not_equal">not equal to</option>
+                                    <option value="less">less than</option>
+                                    <option value="less_or_equal">less than or equal to</option>
+                                    <option value="greater">greater than</option>
+                                    <option value="greater_or_equal">greater than or equal to</option>
+                                </param>
+                                <param name="value" type="float" value="2500" label="Value"/>
+                            </when>
+                            <when value="text">
+                                <param name="filter" type="select" label="Filter">
+                                    <option value="equal">equal to</option>
+                                    <option value="not_equal">not equal to</option></param>
+                                <param name="value" type="text" value="2500" label="Value"/>
+                            </when>
+                            <when value="boolean">
+                                <param name="value" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Value to keep"/>
+                            </when>
+                        </conditional>
+                    </when>
+                    <when value="index">
+                        <conditional name="index">
+                            <param name="format" type="select" label="Format for the filter by index">
+                                <option value="file">File</option>
+                                <option value="text" selected="true">Text</option>
+                            </param>
+                            <when value="text">
+                                <param name="text" type="text" value="" label="List of index to keep" help="Indexes separated by a comma"/>
+                            </when>
+                            <when value="file">
+                                <param name="file" type="data" format="txt" label="File with the list of index to keep" help="One index per line"/>
+                            </when>
+                        </conditional>
                     </when>
                 </conditional>
             </when>
@@ -478,7 +587,60 @@
             </output>
         </test>
 
-        <!--  test 4 -->
+        <test expect_num_outputs="1">
+            <!-- test 4 -->
+            <param name="adata" value="krumsiek11.h5ad"/>
+            <conditional name="method">
+                <param name="method" value="filter_any"/>
+                <param name="var_obs" value="var"/>
+                <conditional name="filter">
+                    <param name="filter" value="index"/>
+                    <conditional name="index">
+                        <param name="format" value="text"/>
+                        <param name="text" value="Gata2,EKLF"/>
+                    </conditional>
+                </conditional>
+            </conditional>
+            <assert_stdout>
+                <has_text_matching expression="640 × 2"/>
+            </assert_stdout>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs/cell_type"/>
+                    <has_h5_keys keys="uns/highlights"/>
+                    <has_h5_keys keys="uns/iroot"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <!-- test 5 -->
+            <param name="adata" value="krumsiek11.h5ad"/>
+            <conditional name="method">
+                <param name="method" value="filter_any"/>
+                <param name="var_obs" value="obs"/>
+                <conditional name="filter">
+                    <param name="filter" value="key"/>
+                    <param name="key" value="cell_type"/>
+                    <conditional name="filter_key">
+                        <param name="type" value="text"/>
+                        <param name="filter" value="equal"/>
+                        <param name="value" value="progenitor"/>
+                    </conditional>
+                </conditional>
+            </conditional>
+            <assert_stdout>
+                <has_text_matching expression="320 × 11"/>
+            </assert_stdout>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs/cell_type"/>
+                    <has_h5_keys keys="uns/highlights"/>
+                    <has_h5_keys keys="uns/iroot"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!--  test 6 -->
         <!-- Fails to write to anndata after tl.filter_rank_genes_groups
              Issue has been reported here: https://github.com/scverse/anndata/issues/726
              The current fix is: del adata.uns['rank_genes_groups_filtered']  -->
@@ -511,7 +673,7 @@
             </output>
         </test>
 
-        <!-- test 5 -->
+        <!-- test 7 -->
         <test expect_num_outputs="2">
             <param name="adata" value="blobs.h5ad"/>
             <conditional name="method">
@@ -539,7 +701,7 @@
             </output>
         </test>
 
-        <!-- test 6 -->
+        <!-- test 8 -->
         <test expect_num_outputs="2">
             <param name="adata" value="krumsiek11.h5ad"/>
             <conditional name="method">
@@ -570,7 +732,7 @@
             </output>
         </test>
 
-        <!-- test 7 -->
+        <!-- test 9 -->
         <test expect_num_outputs="2">
             <param name="adata" value="krumsiek11.h5ad"/>
             <conditional name="method">
@@ -600,8 +762,8 @@
             </output>
         </test>
 
-        <!-- test 8 -->
-        <test expect_num_outputs="2">    
+        <!-- test 10 -->
+        <test expect_num_outputs="2">
             <param name="adata" value="krumsiek11.h5ad"/>
             <conditional name="method">
                 <param name="method" value="pp.subsample"/>
@@ -630,7 +792,7 @@
             </output>
         </test>
 
-        <!-- test 9 -->
+        <!-- test 11 -->
         <test expect_num_outputs="2">
             <param name="adata" value="random-randint.h5ad"/>
             <conditional name="method">
@@ -657,7 +819,7 @@
             </output>
         </test>
 
-        <!-- test 10 -->
+        <!-- test 12 -->
         <test expect_num_outputs="2">
             <param name="adata" value="random-randint.h5ad"/>
             <conditional name="method">
@@ -686,7 +848,7 @@
             </output>
         </test>
 
-        <!-- test 10 -->
+        <!-- test 13 -->
         <test expect_num_outputs="2">
             <param name="adata" value="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_1.h5ad"/>
             <conditional name="method">
@@ -716,7 +878,7 @@
             </output>
         </test>
 
-        <!-- test 11 -->
+        <!-- test 14 -->
         <test expect_num_outputs="2">
             <param name="adata" value="krumsiek11.h5ad"/>
             <conditional name="method">