diff remove_confounders.xml @ 0:9ca360dde8e3 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 92f85afaed0097d1879317a9f513093fce5481d6
author iuc
date Mon, 04 Mar 2019 10:16:47 -0500
parents
children a89ee42625ad
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/remove_confounders.xml	Mon Mar 04 10:16:47 2019 -0500
@@ -0,0 +1,204 @@
+<tool id="scanpy_remove_confounders" name="Remove confounders with scanpy" version="@version@">
+    <description></description>
+    <macros>
+        <import>macros.xml</import>
+        <xml name="score_genes_params">
+            <param argument="n_bins" type="integer" value="25" label="Number of expression level bins for sampling" help=""/>
+            <param argument="random_state" type="integer" value="0" label="Random seed for sampling" help=""/>
+            <expand macro="param_use_raw"/>
+        </xml>
+        <token name="@CMD_score_genes_inputs@"><![CDATA[
+    n_bins=$method.n_bins,
+    random_state=$method.random_state,
+    use_raw=$method.use_raw,
+    copy=False
+        ]]></token>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+@CMD@
+      ]]></command>
+    <configfiles>
+        <configfile name="script_file"><![CDATA[
+@CMD_imports@
+@CMD_read_inputs@
+
+#if $method.method == "pp.regress_out"
+sc.pp.regress_out(
+   adata=adata,
+   keys='$method.reg_keys',
+   copy=False)
+#elif $method.method == "tl.score_genes"
+sc.tl.score_genes(
+    adata=adata,
+    #set $gene_list = [str(x.strip()) for x in str($method.gene_list).split(',')]
+    gene_list=$gene_list,
+    ctrl_size=$method.ctrl_size,
+    score_name='$method.score_name',
+    #if $method.gene_pool
+        #set $gene_pool = [str(x.strip()) for x in $method.gene_pool.split(',')]
+    gene_pool=$gene_pool,
+    #end if
+    @CMD_score_genes_inputs@)
+adata.obs.to_csv('$obs', sep='\t')
+#elif $method.method == "tl.score_genes_cell_cycle"
+sc.tl.score_genes_cell_cycle(
+    adata=adata,
+    #set $s_genes = [str(x.strip()) for x in $method.s_genes.split(',')]
+    s_genes=$s_genes,
+    #set $g2m_genes = [str(x.strip()) for x in $method.g2m_genes.split(',')]
+    g2m_genes=$g2m_genes,
+    @CMD_score_genes_inputs@)
+adata.obs.to_csv('$obs', sep='\t')
+#end if
+
+@CMD_anndata_write_outputs@
+]]></configfile>
+    </configfiles>
+    <inputs>
+        <expand macro="inputs_anndata"/>
+        <conditional name="method">
+            <param argument="method" type="select" label="Method used for plotting">
+                <option value="pp.regress_out">Regress out unwanted sources of variation, using `pp.regress_out`</option>
+                <!--<option value="pp.mnn_correct">, using `pp.mnn_correct`</option>!-->
+                <!--<option value="pp.dca">, using `pp.mnn_correct`</option>!-->
+                <!--<option value="pp.magic">, using `pp.magic`</option>!-->
+                <!--<option value="tl.sim">, using `tl.sim`</option>!-->
+                <!--<option value="pp.calculate_qc_metrics">, using `pp.calculate_qc_metrics`</option>!-->
+                <option value="tl.score_genes">Score a set of genes, using `tl.score_genes`</option>
+                <option value="tl.score_genes_cell_cycle">Score cell cycle genes, using `tl.score_genes_cell_cycle`</option>
+                <!--<option value="tl.cyclone">, using `tl.cyclone`</option>!-->
+                <!--<option value="tl.andbag">, using `tl.andbag`</option>!-->
+            </param>
+            <when value="pp.regress_out">
+                <param argument="reg_keys" type="text" value="" label="Keys for observation annotation on which to regress on" help=""/>
+            </when>
+            <when value="tl.score_genes">
+                <param argument="gene_list" type="text" value="" label="The list of gene names used for score calculation" help="Genes separated by a comma"/>
+                <param argument="ctrl_size" type="integer" value="50" label="Number of reference genes to be sampled"
+                    help="If `len(gene_list)` is not too low, you can set `ctrl_size=len(gene_list)`."/>
+                <param argument="gene_pool" type="text" value="" optional="true" label="Genes for sampling the reference set"
+                    help="Default is all genes. Genes separated by a comma"/>
+                <expand macro="score_genes_params"/>
+                <param argument="score_name" type="text" value="score" label="Name of the field to be added in `.obs`" help=""/>
+            </when>
+            <when value="tl.score_genes_cell_cycle">
+                <param name="s_genes" type="text" value="" label="List of genes associated with S phase" help="Genes separated by a comma"/>
+                <param name="g2m_genes" type="text" value="" label="List of genes associated with G2M phase" help="Genes separated by a comma"/>
+                <expand macro="score_genes_params"/>
+            </when>
+        </conditional>
+        <expand macro="anndata_output_format"/>
+    </inputs>
+    <outputs>
+        <expand macro="anndata_outputs"/>
+        <data name="obs" format="tabular" label="${tool.name} on ${on_string}: Observations annotation">
+            <filter>method['method'] == 'tl.score_genes' or method['method'] == 'tl.score_genes_cell_cycle'</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <conditional name="input">
+                <param name="format" value="h5ad" />
+                <param name="adata" value="krumsiek11.h5ad" />
+            </conditional>
+            <conditional name="method">
+                <param name="method" value="pp.regress_out"/>
+                <param name="reg_keys" value="cell_type"/>
+            </conditional>
+            <param name="anndata_output_format" value="h5ad" />
+            <assert_stdout>
+                <has_text_matching expression="sc.pp.regress_out"/>
+                <has_text_matching expression="keys='cell_type'"/>
+            </assert_stdout>
+            <output name="anndata_out_h5ad" file="pp.regress_out.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
+        </test>
+        <test>
+            <conditional name="input">
+                <param name="format" value="h5ad" />
+                <param name="adata" value="krumsiek11.h5ad" />
+            </conditional>
+            <conditional name="method">
+                <param name="method" value="tl.score_genes"/>
+                <param name="gene_list" value="Gata2, Fog1"/>
+                <param name="ctrl_size" value="2"/>
+                <param name="n_bins" value="2"/>
+                <param name="random_state" value="2"/>
+                <param name="use_raw" value="False"/>
+                <param name="score_name" value="score"/>
+            </conditional>
+            <param name="anndata_output_format" value="h5ad"/>
+            <assert_stdout>
+                <has_text_matching expression="sc.tl.score_genes" />
+                <has_text_matching expression="gene_list=\['Gata2', 'Fog1'\]" />
+                <has_text_matching expression="ctrl_size=2" />
+                <has_text_matching expression="score_name='score'" />
+                <has_text_matching expression="n_bins=2" />
+                <has_text_matching expression="random_state=2" />
+                <has_text_matching expression="use_raw=False" />
+                <has_text_matching expression="copy=False" />
+            </assert_stdout>
+            <output name="anndata_out_h5ad" file="tl.score_genes.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
+            <output name="obs" file="tl.score_genes.krumsiek11.obs.tabular" ftype="tabular" compare="sim_size"/>
+        </test>
+        <test>
+            <conditional name="input">
+                <param name="format" value="h5ad" />
+                <param name="adata" value="krumsiek11.h5ad" />
+            </conditional>
+            <conditional name="method">
+                <param name="method" value="tl.score_genes_cell_cycle"/>
+                <param name="s_genes" value="Gata2, Fog1, EgrNab"/>
+                <param name="g2m_genes" value="Gata2, Fog1, EgrNab"/>
+                <param name="n_bins" value="2"/>
+                <param name="random_state" value="1"/>
+                <param name="use_raw" value="False"/>
+            </conditional>
+            <param name="anndata_output_format" value="h5ad"/>
+            <assert_stdout>
+                <has_text_matching expression="sc.tl.score_genes_cell_cycle"/>
+                <has_text_matching expression="s_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/>
+                <has_text_matching expression="g2m_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/>
+                <has_text_matching expression="n_bins=2"/>
+                <has_text_matching expression="random_state=1"/>
+                <has_text_matching expression="use_raw=False"/>
+            </assert_stdout>
+            <output name="anndata_out_h5ad" file="tl.score_genes_cell_cycle.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
+            <output name="obs" file="tl.score_genes_cell_cycle.krumsiek11.obs.tabular" ftype="tabular" compare="sim_size"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+Regress out unwanted sources of variation, using `pp.regress_out`
+=================================================================
+
+Regress out unwanted sources of variation, using simple linear regression. This is 
+inspired by Seurat's `regressOut` function in R.
+
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.regress_out.html#scanpy.api.pp.regress_out>`__
+
+Score a set of genes, using `tl.score_genes`
+============================================
+
+The score is the average expression of a set of genes subtracted with the
+average expression of a reference set of genes. The reference set is
+randomly sampled from the `gene_pool` for each binned expression value.
+
+This reproduces the approach in Seurat (Satija et al, 2015) and has been implemented
+for Scanpy by Davide Cittaro.
+
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.score_genes.html#scanpy.api.tl.score_genes>`__
+
+Score cell cycle genes, using `tl.score_genes_cell_cycle`
+=========================================================
+
+Given two lists of genes associated to S phase and G2M phase, calculates
+scores and assigns a cell cycle phase (G1, S or G2M). See
+`score_genes` for more explanation.
+
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.score_genes_cell_cycle.html#scanpy.api.tl.score_genes_cell_cycle>`__
+    ]]></help>
+    <expand macro="citations"/>
+</tool>