diff normalize.xml @ 1:a9f14e2d1655 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 8ef5f7c6f8728608a3f05bb51e11b642b84a05f5"
author iuc
date Wed, 16 Oct 2019 06:31:10 -0400
parents ed64c90a9b93
children 8e0f141c8c66
line wrap: on
line diff
--- a/normalize.xml	Mon Mar 04 10:16:12 2019 -0500
+++ b/normalize.xml	Wed Oct 16 06:31:10 2019 -0400
@@ -1,5 +1,5 @@
-<tool id="scanpy_normalize" name="Normalize with scanpy" version="@galaxy_version@">
-    <description></description>
+<tool id="scanpy_normalize" name="Normalize" version="@galaxy_version@">
+    <description>with scanpy</description>
     <macros>
         <import>macros.xml</import>
     </macros>
@@ -13,26 +13,36 @@
 @CMD_imports@
 @CMD_read_inputs@
 
-#if $method.method == "pp.normalize_per_cell"
-sc.pp.normalize_per_cell(
-    data=adata,
-    #if $method.counts_per_cell_after
-    counts_per_cell_after=$method.counts_per_cell_after,
+#if $method.method == "pp.normalize_total"
+sc.pp.normalize_total(
+    adata,
+    #if str($method.target_sum)!= ''
+    target_sum=$method.target_sum,
+    #end if
+    exclude_highly_expressed=$method.exclude_highly_expressed.exclude_highly_expressed,
+    #if $method.exclude_highly_expressed.exclude_highly_expressed == "True"
+    max_fraction=$method.exclude_highly_expressed.max_fraction,
     #end if
-    #if $method.counts_per_cell
-    counts_per_cell=np.loadtxt('$method.counts_per_cell'),
+    key_added='$method.key_added',
+    #if str($method.layers) != 'all'
+    layers[str(x.strip()) for x in str($method.layers).split(',')],
+    #else
+    layers='$method.layers',
     #end if
-    key_n_counts='$method.key_n_counts',
-    copy=False)
-adata.obs.to_csv('$anndata_obs', sep='\t')
-#elif $method.method == "pp.recipe_zheng17"
+    #if str($method.layer_norm) != "None"
+        layer_norm='$method.layer_norm',
+    #end if
+    inplace=True)
+
+#else if $method.method == "pp.recipe_zheng17"
 sc.pp.recipe_zheng17(
     adata=adata,
     n_top_genes=$method.n_top_genes,
     log=$method.log,
     plot=False,
     copy=False)
-#elif $method.method == "pp.recipe_weinreb17"
+
+#else if $method.method == "pp.recipe_weinreb17"
 sc.pp.recipe_weinreb17(
     adata=adata,
     log=$method.log,
@@ -42,34 +52,14 @@
     svd_solver='$method.svd_solver',
     random_state=$method.random_state,
     copy=False)
-#elif $method.method == "pp.recipe_seurat"
+
+#else if $method.method == "pp.recipe_seurat"
 sc.pp.recipe_seurat(
     adata=adata,
     log=$method.log,
     plot=False,
     copy=False)
-#elif $method.method == "pp.log1p"
-sc.pp.log1p(
-    data=adata,
-    copy=False)
-#elif $method.method == "pp.scale"
-sc.pp.scale(
-    data=adata,
-    zero_center=$method.zero_center,
-    #if $method.max_value
-    max_value=$method.max_value,
-    #end if
-    copy=False)
-#elif $method.method == "pp.sqrt"
-sc.pp.sqrt(
-    data=adata,
-    copy=False)
-#elif $method.method == "pp.downsample_counts"
-sc.pp.downsample_counts(
-    adata=adata,
-    target_counts=$method.target_counts,
-    random_state=$method.random_state,
-    copy=False)
+
 #end if
 
 @CMD_anndata_write_outputs@
@@ -79,20 +69,31 @@
     <inputs>
         <expand macro="inputs_anndata"/>
         <conditional name="method">
-            <param argument="method" type="select" label="Method used for plotting">
-                <option value="pp.normalize_per_cell">Normalize total counts per cell, using `pp.normalize_per_cell`</option>
+            <param argument="method" type="select" label="Method used for normalization">
+                <option value="pp.normalize_total">Normalize counts per cell, using `pp.normalize_total`</option>
                 <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using `pp.recipe_zheng17`</option>
                 <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using `pp.recipe_weinreb17`</option>
                 <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using `pp.recipe_seurat`</option>
-                <option value="pp.log1p">Logarithmize the data matrix, using `pp.log1p`</option>
-                <option value="pp.scale">Scale data to unit variance and zero mean, using `pp.scale`</option>
-                <option value="pp.sqrt">Square root the data matrix, using `pp.sqrt`</option>
-                <option value="pp.downsample_counts">Downsample counts, using `pp.downsample_counts`</option>
             </param>
-            <when value="pp.normalize_per_cell">
-                <param argument="counts_per_cell_after" type="float" value="" optional="true" label="Counts per cell after" help="If not provided, after normalization, each cell has a total count equal to the median of the *counts_per_cell* before normalization."/>
-                <param argument="counts_per_cell" type="data" format="tabular,txt" optional="true" label="Precomputed counts per cell" help=""/>
-                <param argument="key_n_counts" type="text" value="n_counts" label="Name of the field in `adata.obs` where the total counts per cell will be stored" help=""/>
+            <when value="pp.normalize_total">
+                <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/>
+                <conditional name="exclude_highly_expressed">
+                    <param argument="exclude_highly_expressed" type="select" label="Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell" help=" A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum">
+                        <option value="True">Yes</option>
+                        <option value="False" selected="true">No</option>
+                    </param>
+                    <when value="True">
+                        <param argument="max_fraction" type="float" value="0.05" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/>
+                    </when>
+                    <when value="False"/>
+                </conditional>
+                <param argument="key_added" type="text" value="n_counts" label="Name of the field in `adata.obs` where the normalization factor is stored" help=""/>
+                <param argument="layers" type="text" value="all" label="List of layers to normalize" help="'All' will normalize all layers. The list should be comma-separated."/>
+                <param argument="layer_norm" type="select" label="How to normalize layers?">
+                    <option value="None">None: after normalization, for each layer in layers each cell has a total count equal to the median of the median of the total counts (cells) before normalization of the layer.</option>
+                    <option value="after">After: for each layer in layers each cell has a total count equal to target_sum.</option>
+                    <option value="X">X: for each layer in layers each cell has a total count equal to the median of total counts for observations (cells) of adata.X before normalization.</option>
+                </param>
             </when>
             <when value="pp.recipe_zheng17">
                 <param argument="n_top_genes" type="integer" min="0" value="1000" label="Number of genes to keep" help=""/>
@@ -109,73 +110,50 @@
             <when value="pp.recipe_seurat">
                 <expand macro="param_log"/>
             </when>
-            <when value="pp.log1p"/>
-            <when value="pp.scale">
-                <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true"
-                    label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/>
-                <param argument="max_value" type="float" value="" optional="true" label="Maximum value"
-                    help="Clip (truncate) to this value after scaling. If not set, it does not clip."/>
-            </when>
-            <when value="pp.sqrt"/>
-            <when value="pp.downsample_counts">
-                <param argument="target_counts" type="integer" min="0" value="20000"
-                    label="Target number of counts for downsampling" help="Cells with more counts than 'target_counts' will be downsampled to have 'target_counts' counts."/>
-                <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling" help=""/>
-            </when>
         </conditional>
-        <expand macro="anndata_output_format"/>
     </inputs>
     <outputs>
         <expand macro="anndata_outputs"/>
-        <data name="anndata_obs" format="tabular" label="${tool.name} on ${on_string}: Annotation of observations">
-            <filter>method['method'] == 'pp.normalize_per_cell'</filter>
-        </data>
     </outputs>
     <tests>
         <test>
-            <conditional name="input">
-                <param name="format" value="h5ad" />
-                <param name="adata" value="krumsiek11.h5ad" />
-            </conditional>
+            <!-- test 1 -->
+            <param name="adata" value="krumsiek11.h5ad" />
             <conditional name="method">
-                <param name="method" value="pp.normalize_per_cell"/>
-                <param name="counts_per_cell_after" value="2"/>
-                <param name="counts_per_cell" value="krumsiek11_counts_per_cell"/>
-                <param name="key_n_counts" value="n_counts"/>
+                <param name="method" value="pp.normalize_total"/>
+                <conditional name="exclude_highly_expressed">
+                    <param name="exclude_highly_expressed" value="False"/>
+                </conditional>
+                <param name="key_added" value="n_counts"/>
+                <param name="layers" value="all"/>
+                <param name="layer_norm" value="None"/>
             </conditional>
-            <param name="anndata_output_format" value="h5ad"/>
             <assert_stdout>
-                <has_text_matching expression="sc.pp.normalize_per_cell"/>
-                <has_text_matching expression="counts_per_cell_after=2.0"/>
-                <has_text_matching expression="counts_per_cell=np.loadtxt"/>
-                <has_text_matching expression="key_n_counts='n_counts'"/>
+                <has_text_matching expression="sc.pp.normalize_total"/>
+                <has_text_matching expression="exclude_highly_expressed=False"/>
+                <has_text_matching expression="key_added='n_counts'"/>
+                <has_text_matching expression="layers='all'"/>
             </assert_stdout>
-            <output name="anndata_out_h5ad" file="pp.normalize_per_cell.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
-            <output name="anndata_obs" file="pp.normalize_per_cell.obs.krumsiek11.tabular"/>
+            <output name="anndata_out" file="pp.normalize_total.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
         </test>
         <test>
-            <conditional name="input">
-                <param name="format" value="h5ad" />
-                <param name="adata" value="random-randint.h5ad"/>
-            </conditional>
+            <!-- test 2 -->
+            <param name="adata" value="random-randint.h5ad"/>
             <conditional name="method">
                 <param name="method" value="pp.recipe_zheng17"/>
                 <param name="n_top_genes" value="1000"/>
                 <param name="log" value="True"/>
             </conditional>
-            <param name="anndata_output_format" value="h5ad"/>
             <assert_stdout>
                 <has_text_matching expression="sc.pp.recipe_zheng17"/>
                 <has_text_matching expression="n_top_genes=1000"/>
                 <has_text_matching expression="log=True"/>
             </assert_stdout>
-            <output name="anndata_out_h5ad" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5" compare="sim_size"/>
+            <output name="anndata_out" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5ad" compare="sim_size"/>
         </test>
         <test>
-            <conditional name="input">
-                <param name="format" value="h5ad" />
-                <param name="adata" value="paul15_subsample.h5ad" />
-            </conditional>
+            <!-- test 3 -->
+            <param name="adata" value="paul15_subsample.h5ad" />
             <conditional name="method">
                 <param name="method" value="pp.recipe_weinreb17"/>
                 <param name="log" value="True"/>
@@ -185,7 +163,6 @@
                 <param name="svd_solver" value="randomized"/>
                 <param name="random_state" value="0"/>
             </conditional>
-            <param name="anndata_output_format" value="h5ad" />
             <assert_stdout>
                 <has_text_matching expression="sc.pp.recipe_weinreb17"/>
                 <has_text_matching expression="log=True"/>
@@ -195,108 +172,22 @@
                 <has_text_matching expression="svd_solver='randomized'"/>
                 <has_text_matching expression="random_state=0"/>
             </assert_stdout>
-            <output name="anndata_out_h5ad" file="pp.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"/>
+            <output name="anndata_out" file="pp.recipe_weinreb17.paul15_subsample.updated.h5ad" ftype="h5ad" compare="sim_size"/>
         </test>
         <test>
-            <conditional name="input">
-                <param name="format" value="h5ad" />
-                <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" />
-            </conditional>
+            <!-- test 4 -->
+            <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" />
             <conditional name="method">
                 <param name="method" value="pp.recipe_seurat"/>
                 <param name="log" value="True"/>
             </conditional>
-            <param name="anndata_output_format" value="h5ad"/>
             <assert_stdout>
                 <has_text_matching expression="sc.pp.recipe_seurat"/>
                 <has_text_matching expression="log=True"/>
             </assert_stdout>
-            <output name="anndata_out_h5ad" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5" compare="sim_size"/>
-        </test>
-        <test>
-            <conditional name="input">
-                <param name="format" value="h5ad" />
-                <param name="adata" value="krumsiek11.h5ad" />
-            </conditional>
-            <conditional name="method">
-                <param name="method" value="pp.log1p"/>
-            </conditional>
-            <param name="anndata_output_format" value="h5ad" />
-            <assert_stdout>
-                <has_text_matching expression="sc.pp.log1p"/>
-            </assert_stdout>
-            <output name="anndata_out_h5ad" file="pp.log1p.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
-        </test>
-        <test>
-            <conditional name="input">
-                <param name="format" value="h5ad" />
-                <param name="adata" value="krumsiek11.h5ad" />
-            </conditional>
-            <conditional name="method">
-                <param name="method" value="pp.scale"/>
-                <param name="zero_center" value="true"/>
-            </conditional>
-            <param name="anndata_output_format" value="h5ad" />
-            <assert_stdout>
-                <has_text_matching expression="sc.pp.scale"/>
-                <has_text_matching expression="zero_center=True"/>
-            </assert_stdout>
-            <output name="anndata_out_h5ad" file="pp.scale.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
+            <output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size"/>
         </test>
-        <test>
-            <conditional name="input">
-                <param name="format" value="h5ad" />
-                <param name="adata" value="krumsiek11.h5ad" />
-            </conditional>
-            <conditional name="method">
-                <param name="method" value="pp.scale"/>
-                <param name="zero_center" value="true"/>
-                <param name="max_value" value="10"/>
-            </conditional>
-            <param name="anndata_output_format" value="h5ad" />
-            <assert_stdout>
-                <has_text_matching expression="sc.pp.scale"/>
-                <has_text_matching expression="zero_center=True"/>
-                <has_text_matching expression="max_value=10.0"/>
-            </assert_stdout>
-            <output name="anndata_out_h5ad" file="pp.scale_max_value.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
-        </test>
-        <test>
-            <conditional name="input">
-                <param name="format" value="h5ad" />
-                <param name="adata" value="krumsiek11.h5ad" />
-            </conditional>
-            <conditional name="method">
-                <param name="method" value="pp.sqrt"/>
-            </conditional>
-            <param name="anndata_output_format" value="h5ad" />
-            <assert_stdout>
-                <has_text_matching expression="sc.pp.sqrt"/>
-            </assert_stdout>
-            <output name="anndata_out_h5ad" file="pp.sqrt.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
-        </test>
-        <test>
-            <conditional name="input">
-                <param name="format" value="h5ad" />
-                <param name="adata" value="random-randint.h5ad" />
-            </conditional>
-            <conditional name="method">
-                <param name="method" value="pp.downsample_counts"/>
-                <param name="target_counts" value="20000"/>
-                <param name="random_state" value="0"/>
-            </conditional>
-            <param name="anndata_output_format" value="h5ad" />
-            <assert_stdout>
-                <has_text_matching expression="sc.pp.downsample_counts"/>
-                <has_text_matching expression="target_counts=20000"/>
-                <has_text_matching expression="random_state=0"/>
-            </assert_stdout>
-            <output name="anndata_out_h5ad" ftype="h5">
-                <assert_contents>
-                    <has_h5_keys keys="X, obs, var" />
-                </assert_contents>
-            </output>
-        </test>
+        
     </tests>
     <help><![CDATA[
 Normalize total counts per cell (`pp.normalize_per_cell`)
@@ -308,7 +199,7 @@
 Similar functions are used, for example, by Seurat, Cell Ranger or SPRING.
 
 More details on the `scanpy documentation
-<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.normalize_per_cell.html>`__
+<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.normalize_per_cell.html>`__
 
 
 Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`)
@@ -327,7 +218,7 @@
 - scale to unit variance and shift to zero mean
 
 More details on the `scanpy documentation
-<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_zheng17.html>`__
+<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.recipe_zheng17.html>`__
 
 
 Normalization and filtering as of Weinreb et al (2017) (`pp.recipe_weinreb17`)
@@ -336,7 +227,7 @@
 Expects non-logarithmized data. If using logarithmized data, pass `log=False`.
 
 More details on the `scanpy documentation
-<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_weinreb17.html>`__
+<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.recipe_weinreb17.html>`__
 
 
 Normalization and filtering as of Seurat et al (2015) (`pp.recipe_seurat`)
@@ -347,33 +238,7 @@
 Expects non-logarithmized data. If using logarithmized data, pass `log=False`.
 
 More details on the `scanpy documentation
-<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_seurat.html>`__
-
-Logarithmize the data matrix (`pp.log1p`)
-=========================================
-
-More details on the `scanpy documentation
-<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.log1p.html>`__
-
-Scale data to unit variance and zero mean (`pp.scale`)
-======================================================
-
-More details on the `scanpy documentation
-<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.scale.html>`__
-
-Computes the square root the data matrix (`pp.sqrt`)
-====================================================
-
-`X = sqrt(X)`
-
-Downsample counts (`pp.downsample_counts`)
-==========================================
-
-Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This
-has been implemented by M. D. Luecken.
-
-More details on the `scanpy documentation
-<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.downsample_counts.html>`__
+<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.recipe_seurat.html>`__
 
     ]]></help>
     <expand macro="citations"/>