Repository 'scanpy_normalize'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/scanpy_normalize

Changeset 13:7b9fafe32c86 (2024-03-14)
Previous changeset 12:94c19fb1281c (2023-11-17) Next changeset 14:d844935c906c (2024-05-18)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 3974c0ff86bb1d6633281f29dfb4c605a74e1e6e
modified:
macros.xml
normalize.xml
added:
test-data/external.pp.magic.all_genes.krumsiek11.h5ad
test-data/external.pp.magic.pca_only.krumsiek11.h5ad
b
diff -r 94c19fb1281c -r 7b9fafe32c86 macros.xml
--- a/macros.xml Fri Nov 17 09:16:03 2023 +0000
+++ b/macros.xml Thu Mar 14 15:37:29 2024 +0000
b
@@ -1,6 +1,6 @@
 <macros>
     <token name="@TOOL_VERSION@">1.9.6</token>
-    <token name="@VERSION_SUFFIX@">1</token>
+    <token name="@VERSION_SUFFIX@">2</token>
     <token name="@profile@">22.05</token>
     <xml name="requirements">
         <requirements>
@@ -11,6 +11,7 @@
             <requirement type="package" version="1.5.3">pandas</requirement>
             <requirement type="package" version="3.7">matplotlib</requirement>
             <requirement type="package" version="0.12.2">seaborn</requirement>
+            <requirement type="package" version="3.0.0">magic-impute</requirement>
             <yield />
         </requirements>
     </xml>
b
diff -r 94c19fb1281c -r 7b9fafe32c86 normalize.xml
--- a/normalize.xml Fri Nov 17 09:16:03 2023 +0000
+++ b/normalize.xml Thu Mar 14 15:37:29 2024 +0000
[
b'@@ -1,5 +1,5 @@\n <tool id="scanpy_normalize" name="Normalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">\n-    <description>with scanpy</description>\n+    <description>and impute with scanpy</description>\n     <macros>\n         <import>macros.xml</import>\n     </macros>\n@@ -65,6 +65,29 @@\n     plot=False,\n     copy=False)\n \n+#else if $method.method == "external.pp.magic"\n+sc.external.pp.magic(\n+    adata=adata,\n+    name_list=\'$method.name_list\',\n+    knn=$method.knn,\n+    #if str($method.decay) != \'\'\n+    decay=$method.decay,\n+    #end if\n+    #if str($method.knn_max) != \'\'\n+    knn_max=$method.knn_max,\n+    #end if\n+    #if $method.t == -1\n+    t=\'auto\',\n+    #else\n+    t=$method.t,\n+    #end if\n+    #if str($method.n_pca) != \'\'\n+    n_pca=$method.n_pca,\n+    #end if\n+    solver=\'$method.solver\',\n+    knn_dist=\'$method.knn_dist\',\n+    random_state=$method.random_state,\n+    copy=False)\n #end if\n \n @CMD_anndata_write_outputs@\n@@ -79,6 +102,7 @@\n                 <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using \'pp.recipe_zheng17\'</option>\n                 <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using \'pp.recipe_weinreb17\'</option>\n                 <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using \'pp.recipe_seurat\'</option>\n+                <option value="external.pp.magic">Denoising using Markov Affinity-based Graph Imputation of Cells (MAGIC) API \'external.pp.magic\'</option>\n             </param>\n             <when value="pp.normalize_total">\n                 <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/>\n@@ -119,6 +143,29 @@\n             <when value="pp.recipe_seurat">\n                 <expand macro="param_log"/>\n             </when>\n+            <when value="external.pp.magic">\n+                <param name="name_list" type="select" label="Denoised genes to return" help="Selecting all genes may require a large amount of memory">\n+                    <option value="all_genes">All genes</option>\n+                    <option value="pca_only">PCA only</option>\n+                </param>\n+                <param argument="knn" type="integer" min="1" value="5" label="Number of nearest neighbors on which to build kernel" help=""/>\n+                <param argument="decay" type="integer" optional="true" value="1" label="Set decay rate of kernel tails" \n+                    help="If not set, alpha decaying kernel is not used" />\n+                <param argument="knn_max" type="integer" min="1" optional="true" value="" label="Maximum number of nearest neighbors with nonzero connection"\n+                    help="If not set, will be set to 3 * knn" />\n+                <param argument="t" type="integer" min="-1" value="3" label="Power to which the diffusion operator is powered. This sets the level of diffusion"\n+                    help="If \xe2\x80\x98-1\xe2\x80\x99, this parameter is selected according to the Procrustes disparity of the diffused data." />\n+                <param argument="n_pca" type="integer" value="100" optional="true" label="Number of principal components to use for calculating neighborhoods"\n+                    help="For extremely large datasets, using n_pca less than 20 allows neighborhoods to be calculated in roughly log(n_samples) time. If not set, no PCA is performed." />\n+                <param name="solver" type="select" label="Which solver to use" help="Selecting all genes may require a large amount of memory">\n+                    <option value="exact">"exact", the implementation described in van Dijk et al. (2018) </option>\n+                    <option value="approximate">"approximate", is faster that performs imputation in the PCA space and then projects back to t'..b'              <expand macro="distance_metric_options"/>\n+                </param>\n+                <expand macro="param_random_state"/>\n+            </when>\n         </conditional>\n         <expand macro="inputs_common_advanced"/>\n     </inputs>\n@@ -217,6 +264,53 @@\n             </output>\n             <output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.25"/>\n         </test>\n+        <test expect_num_outputs="2">\n+            <!-- test 5 -->\n+            <param name="adata" value="krumsiek11.h5ad" />\n+            <conditional name="method">\n+                <param name="method" value="external.pp.magic"/>\n+                <param name="name_list" value="all_genes"/>\n+                <param name="t" value="-1"/>\n+                <param name="n_pca" value="5"/>\n+            </conditional>\n+            <section name="advanced_common">\n+                <param name="show_log" value="true" />\n+            </section>\n+            <output name="hidden_output">\n+                <assert_contents>\n+                    <has_text_matching expression="external.pp.magic"/>\n+                    <has_text_matching expression="name_list=\'all_genes\'"/>\n+                    <has_text_matching expression="t=\'auto\'"/>\n+                    <has_text_matching expression="n_pca=5"/>\n+                </assert_contents>\n+            </output>\n+            <output name="anndata_out" file="external.pp.magic.all_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>\n+        </test>\n+        <test expect_num_outputs="2">\n+            <!-- test 6 -->\n+            <param name="adata" value="krumsiek11.h5ad" />\n+            <conditional name="method">\n+                <param name="method" value="external.pp.magic"/>\n+                <param name="name_list" value="pca_only"/>\n+                <param name="t" value="3"/>\n+                <param name="n_pca" value="5"/>\n+            </conditional>\n+            <section name="advanced_common">\n+                <param name="show_log" value="true" />\n+            </section>\n+            <output name="hidden_output">\n+                <assert_contents>\n+                    <has_text_matching expression="external.pp.magic"/>\n+                    <has_text_matching expression="name_list=\'pca_only\'"/>\n+                    <has_text_matching expression="t=3"/>\n+                    <has_text_matching expression="n_pca=5"/>\n+                </assert_contents>\n+            </output>\n+            <output name="anndata_out" file="external.pp.magic.pca_only.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>\n+            <assert_stdout>\n+                <has_text text="X_magic"/>\n+            </assert_stdout>\n+        </test>\n     </tests>\n     <help><![CDATA[\n Normalize total counts per cell (`pp.normalize_per_cell`)\n@@ -269,6 +363,20 @@\n More details on the `scanpy documentation\n <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.recipe_seurat.html>`__\n \n+\n+Markov Affinity-based Graph Imputation of Cells (MAGIC) as of Van Dijk D et al. (2018) (`external.pp.magic`)\n+============================================================================================================\n+\n+MAGIC is an algorithm for denoising and transcript recover of single cells applied to single-cell sequencing data. MAGIC builds a graph from the data and uses diffusion to smooth out noise and recover the data manifold.\n+\n+The algorithm implemented here has changed primarily in two ways compared to the algorithm described in Van Dijk D et al. (2018). \n+\n+- Firstly, we use the adaptive kernel described in Moon et al, (2019) for improved stability.\n+- Secondly, data diffusion is applied in the PCA space, rather than the data space, for speed and memory improvements.\n+\n+More details on the `scanpy documentation\n+<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.external.pp.magic.html>`__\n+\n     ]]></help>\n     <expand macro="citations"/>\n </tool>\n'
b
diff -r 94c19fb1281c -r 7b9fafe32c86 test-data/external.pp.magic.all_genes.krumsiek11.h5ad
b
Binary file test-data/external.pp.magic.all_genes.krumsiek11.h5ad has changed
b
diff -r 94c19fb1281c -r 7b9fafe32c86 test-data/external.pp.magic.pca_only.krumsiek11.h5ad
b
Binary file test-data/external.pp.magic.pca_only.krumsiek11.h5ad has changed