comparison remove_confounders.xml @ 1:a89ee42625ad draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 8ef5f7c6f8728608a3f05bb51e11b642b84a05f5"
author iuc
date Wed, 16 Oct 2019 06:30:25 -0400
parents 9ca360dde8e3
children 94c8f42efc47
comparison
equal deleted inserted replaced
0:9ca360dde8e3 1:a89ee42625ad
1 <tool id="scanpy_remove_confounders" name="Remove confounders with scanpy" version="@version@"> 1 <tool id="scanpy_remove_confounders" name="Remove confounders" version="@version@">
2 <description></description> 2 <description>with scanpy</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 <xml name="score_genes_params">
6 <param argument="n_bins" type="integer" value="25" label="Number of expression level bins for sampling" help=""/>
7 <param argument="random_state" type="integer" value="0" label="Random seed for sampling" help=""/>
8 <expand macro="param_use_raw"/>
9 </xml>
10 <token name="@CMD_score_genes_inputs@"><![CDATA[
11 n_bins=$method.n_bins,
12 random_state=$method.random_state,
13 use_raw=$method.use_raw,
14 copy=False
15 ]]></token>
16 </macros> 5 </macros>
17 <expand macro="requirements"/> 6 <expand macro="requirements"/>
18 <command detect_errors="exit_code"><![CDATA[ 7 <command detect_errors="exit_code"><![CDATA[
19 @CMD@ 8 @CMD@
20 ]]></command> 9 ]]></command>
24 @CMD_read_inputs@ 13 @CMD_read_inputs@
25 14
26 #if $method.method == "pp.regress_out" 15 #if $method.method == "pp.regress_out"
27 sc.pp.regress_out( 16 sc.pp.regress_out(
28 adata=adata, 17 adata=adata,
29 keys='$method.reg_keys', 18 #set $keys = [str(x.strip()) for x in str($method.keys).split(',')]
19 keys=$keys,
30 copy=False) 20 copy=False)
31 #elif $method.method == "tl.score_genes" 21
32 sc.tl.score_genes( 22 #else if $method.method == "pp.mnn_correct"
33 adata=adata, 23 #for i, filepath in enumerate($methods.extra_adata)
34 #set $gene_list = [str(x.strip()) for x in str($method.gene_list).split(',')] 24 adata_$i = ad.read('$filepath')
35 gene_list=$gene_list, 25 #end for
36 ctrl_size=$method.ctrl_size, 26
37 score_name='$method.score_name', 27 sc.pp.mnn_correct(
38 #if $method.gene_pool 28 adata,
39 #set $gene_pool = [str(x.strip()) for x in $method.gene_pool.split(',')] 29 #for i, filepath in enumerate($methods.extra_adata)
40 gene_pool=$gene_pool, 30 adata_$i,
31 #end for
32 #if str($methods.var_subset) != ''
33 #set $var_subset=([x.strip() for x in str($method.var_subset).split(',')])
34 var_subset=$var_subset,
41 #end if 35 #end if
42 @CMD_score_genes_inputs@) 36 batch_key='$method.batch_key',
43 adata.obs.to_csv('$obs', sep='\t') 37 index_unique='$method.index_unique'
44 #elif $method.method == "tl.score_genes_cell_cycle" 38 #if str($methods.batch_categories) != ''
45 sc.tl.score_genes_cell_cycle( 39 #set $batch_categories=([x.strip() for x in str($method.batch_categories).split(',')])
46 adata=adata, 40 batch_categories=$batch_categories,
47 #set $s_genes = [str(x.strip()) for x in $method.s_genes.split(',')] 41 #end if
48 s_genes=$s_genes, 42 k=$method.k,
49 #set $g2m_genes = [str(x.strip()) for x in $method.g2m_genes.split(',')] 43 sigma=$method.sigma,
50 g2m_genes=$g2m_genes, 44 cos_norm_in=$method.cos_norm_in,
51 @CMD_score_genes_inputs@) 45 cos_norm_out=$method.cos_norm_out,
52 adata.obs.to_csv('$obs', sep='\t') 46 svd_dim=$method.svd_dim,
47 var_adj=$method.var_adj,
48 compute_angle=$method.compute_angle,
49 mnn_order='$method.mnn_order',
50 svd_mode='$method.svd_mode',
51 do_concatenate=True,
52 save_raw=True,
53 n_jobs=\${GALAXY_SLOTS:-4})
54
55 #else if $method.method == "pp.combat"
56 sc.pp.combat(
57 adata,
58 key='$method.key',
59 inplace=True)
60
53 #end if 61 #end if
54 62
55 @CMD_anndata_write_outputs@ 63 @CMD_anndata_write_outputs@
56 ]]></configfile> 64 ]]></configfile>
57 </configfiles> 65 </configfiles>
58 <inputs> 66 <inputs>
59 <expand macro="inputs_anndata"/> 67 <expand macro="inputs_anndata"/>
60 <conditional name="method"> 68 <conditional name="method">
61 <param argument="method" type="select" label="Method used for plotting"> 69 <param argument="method" type="select" label="Method used for plotting">
62 <option value="pp.regress_out">Regress out unwanted sources of variation, using `pp.regress_out`</option> 70 <option value="pp.regress_out">Regress out unwanted sources of variation, using `pp.regress_out`</option>
63 <!--<option value="pp.mnn_correct">, using `pp.mnn_correct`</option>!--> 71 <option value="pp.mnn_correct">Correct batch effects by matching mutual nearest neighbors, using `pp.mnn_correct`</option>
64 <!--<option value="pp.dca">, using `pp.mnn_correct`</option>!--> 72 <option value="pp.combat">Correct batch effects with ComBat function, using `pp.combat`</option>
65 <!--<option value="pp.magic">, using `pp.magic`</option>!-->
66 <!--<option value="tl.sim">, using `tl.sim`</option>!-->
67 <!--<option value="pp.calculate_qc_metrics">, using `pp.calculate_qc_metrics`</option>!-->
68 <option value="tl.score_genes">Score a set of genes, using `tl.score_genes`</option>
69 <option value="tl.score_genes_cell_cycle">Score cell cycle genes, using `tl.score_genes_cell_cycle`</option>
70 <!--<option value="tl.cyclone">, using `tl.cyclone`</option>!-->
71 <!--<option value="tl.andbag">, using `tl.andbag`</option>!-->
72 </param> 73 </param>
73 <when value="pp.regress_out"> 74 <when value="pp.regress_out">
74 <param argument="reg_keys" type="text" value="" label="Keys for observation annotation on which to regress on" help=""/> 75 <param argument="keys" type="text" value="" label="Keys for observation annotation on which to regress on" help="Keys separated by a comma"/>
75 </when> 76 </when>
76 <when value="tl.score_genes"> 77 <when value="pp.mnn_correct">
77 <param argument="gene_list" type="text" value="" label="The list of gene names used for score calculation" help="Genes separated by a comma"/> 78 <param name="extra_adata" type="data" multiple="true" optional="true" format="h5ad" label="Extra annotated data matrix" help="They should have same number of variables."/>
78 <param argument="ctrl_size" type="integer" value="50" label="Number of reference genes to be sampled" 79 <param argument="var_subset" type="text" value="" optional="true" label="The subset of vars to be used when performing MNN correction" help="List of comma-separated key from `.var_names`. If not set, all vars are used"/>
79 help="If `len(gene_list)` is not too low, you can set `ctrl_size=len(gene_list)`."/> 80 <param argument="batch_key" type="text" value="batch" label="Batch key for the concatenate"/>
80 <param argument="gene_pool" type="text" value="" optional="true" label="Genes for sampling the reference set" 81 <param name="index_unique" type="select" label="Separator to join the existing index names with the batch category" help="Leave it empty to keep existing indices">
81 help="Default is all genes. Genes separated by a comma"/> 82 <option value="-">-</option>
82 <expand macro="score_genes_params"/> 83 <option value="_">_</option>
83 <param argument="score_name" type="text" value="score" label="Name of the field to be added in `.obs`" help=""/> 84 <option value=" "> </option>
85 <option value="/">/</option>
86 </param>
87 <param argument="batch_categories" type="text" value="" optional="true" label="Batch categories for the concatenate" help="List of comma-separated key"/>
88 <param argument="k" type="integer" value="20" label="Number of mutual nearest neighbors"/>
89 <param argument="sigma" type="float" value="1" label="The bandwidth of the Gaussian smoothing kernel used to compute the correction vectors"/>
90 <param argument="cos_norm_in" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Should cosine normalization be performed on the input data prior to calculating distances between cells?"/>
91 <param argument="cos_norm_out" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Should cosine normalization be performed prior to computing corrected expression values?"/>
92 <param argument="svd_dim" type="integer" value="" optional="true" label="Number of dimensions to use for summarizing biological substructure within each batch" help="If not set, biological components will not be removed from the correction vectors."/>
93 <param argument="var_adj" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Adjust variance of the correction vectors?" help="This step takes most computing time."/>
94 <param argument="compute_angle" type="boolean" truevalue="True" falsevalue="False" checked="false" label="compute the angle between each cell’s correction vector and the biological subspace of the reference batch?"/>
95 <param argument="mnn_order" type="text" value="" optional="true" label="The order in which batches are to be corrected" help="List of comma-separated key. If not set, datas are corrected sequentially"/>
96 <param name="svd_mode" type="select" label="SVD mode">
97 <option value="svd">svd: SVD using a non-randomized SVD-via-ID algorithm</option>
98 <option value="rsvd" selected="true">rsvd: SVD using a randomized SVD-via-ID algorithm</option>
99 <option value="irlb">irlb: truncated SVD by implicitly restarted Lanczos bidiagonalization</option>
100 </param>
84 </when> 101 </when>
85 <when value="tl.score_genes_cell_cycle"> 102 <when value="pp.combat">
86 <param name="s_genes" type="text" value="" label="List of genes associated with S phase" help="Genes separated by a comma"/> 103 <param argument="key" type="text" value="batch" label="Key to a categorical annotation from adata.obs that will be used for batch effect removal"/>
87 <param name="g2m_genes" type="text" value="" label="List of genes associated with G2M phase" help="Genes separated by a comma"/>
88 <expand macro="score_genes_params"/>
89 </when> 104 </when>
90 </conditional> 105 </conditional>
91 <expand macro="anndata_output_format"/>
92 </inputs> 106 </inputs>
93 <outputs> 107 <outputs>
94 <expand macro="anndata_outputs"/> 108 <expand macro="anndata_outputs"/>
95 <data name="obs" format="tabular" label="${tool.name} on ${on_string}: Observations annotation">
96 <filter>method['method'] == 'tl.score_genes' or method['method'] == 'tl.score_genes_cell_cycle'</filter>
97 </data>
98 </outputs> 109 </outputs>
99 <tests> 110 <tests>
100 <test> 111 <test>
101 <conditional name="input"> 112 <!-- test 1 -->
102 <param name="format" value="h5ad" /> 113 <param name="adata" value="krumsiek11.h5ad" />
103 <param name="adata" value="krumsiek11.h5ad" />
104 </conditional>
105 <conditional name="method"> 114 <conditional name="method">
106 <param name="method" value="pp.regress_out"/> 115 <param name="method" value="pp.regress_out"/>
116 <param name="keys" value="cell_type"/>
117 </conditional>
118 <assert_stdout>
119 <has_text_matching expression="sc.pp.regress_out"/>
120 <has_text_matching expression="keys=\['cell_type'\]"/>
121 </assert_stdout>
122 <output name="anndata_out" file="pp.regress_out.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
123 </test>
124 <!--<test>
125 < test 2 >
126 <param name="adata" value="krumsiek11.h5ad" />
127 <conditional name="method">
128 <param name="method" value="pp.mnn_correct"/>
107 <param name="reg_keys" value="cell_type"/> 129 <param name="reg_keys" value="cell_type"/>
108 </conditional> 130 </conditional>
109 <param name="anndata_output_format" value="h5ad" />
110 <assert_stdout> 131 <assert_stdout>
111 <has_text_matching expression="sc.pp.regress_out"/> 132 <has_text_matching expression="sc.pp.mnn_correct"/>
112 <has_text_matching expression="keys='cell_type'"/> 133 <has_text_matching expression="keys='cell_type'"/>
113 </assert_stdout> 134 </assert_stdout>
114 <output name="anndata_out_h5ad" file="pp.regress_out.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> 135 <output name="anndata_out" file="pp.mnn_correct.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
115 </test> 136 </test>-->
116 <test> 137 <test>
117 <conditional name="input"> 138 <!-- test 2 -->
118 <param name="format" value="h5ad" /> 139 <param name="adata" value="blobs.h5ad" />
119 <param name="adata" value="krumsiek11.h5ad" /> 140 <conditional name="method">
141 <param name="method" value="pp.combat"/>
142 <param name="key" value="blobs"/>
120 </conditional> 143 </conditional>
121 <conditional name="method">
122 <param name="method" value="tl.score_genes"/>
123 <param name="gene_list" value="Gata2, Fog1"/>
124 <param name="ctrl_size" value="2"/>
125 <param name="n_bins" value="2"/>
126 <param name="random_state" value="2"/>
127 <param name="use_raw" value="False"/>
128 <param name="score_name" value="score"/>
129 </conditional>
130 <param name="anndata_output_format" value="h5ad"/>
131 <assert_stdout> 144 <assert_stdout>
132 <has_text_matching expression="sc.tl.score_genes" /> 145 <has_text_matching expression="sc.pp.combat"/>
133 <has_text_matching expression="gene_list=\['Gata2', 'Fog1'\]" /> 146 <has_text_matching expression="key='blobs'"/>
134 <has_text_matching expression="ctrl_size=2" />
135 <has_text_matching expression="score_name='score'" />
136 <has_text_matching expression="n_bins=2" />
137 <has_text_matching expression="random_state=2" />
138 <has_text_matching expression="use_raw=False" />
139 <has_text_matching expression="copy=False" />
140 </assert_stdout> 147 </assert_stdout>
141 <output name="anndata_out_h5ad" file="tl.score_genes.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> 148 <output name="anndata_out" file="pp.combat.blobs.h5ad" ftype="h5ad" compare="sim_size"/>
142 <output name="obs" file="tl.score_genes.krumsiek11.obs.tabular" ftype="tabular" compare="sim_size"/>
143 </test>
144 <test>
145 <conditional name="input">
146 <param name="format" value="h5ad" />
147 <param name="adata" value="krumsiek11.h5ad" />
148 </conditional>
149 <conditional name="method">
150 <param name="method" value="tl.score_genes_cell_cycle"/>
151 <param name="s_genes" value="Gata2, Fog1, EgrNab"/>
152 <param name="g2m_genes" value="Gata2, Fog1, EgrNab"/>
153 <param name="n_bins" value="2"/>
154 <param name="random_state" value="1"/>
155 <param name="use_raw" value="False"/>
156 </conditional>
157 <param name="anndata_output_format" value="h5ad"/>
158 <assert_stdout>
159 <has_text_matching expression="sc.tl.score_genes_cell_cycle"/>
160 <has_text_matching expression="s_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/>
161 <has_text_matching expression="g2m_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/>
162 <has_text_matching expression="n_bins=2"/>
163 <has_text_matching expression="random_state=1"/>
164 <has_text_matching expression="use_raw=False"/>
165 </assert_stdout>
166 <output name="anndata_out_h5ad" file="tl.score_genes_cell_cycle.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
167 <output name="obs" file="tl.score_genes_cell_cycle.krumsiek11.obs.tabular" ftype="tabular" compare="sim_size"/>
168 </test> 149 </test>
169 </tests> 150 </tests>
170 <help><![CDATA[ 151 <help><![CDATA[
171 Regress out unwanted sources of variation, using `pp.regress_out` 152 Regress out unwanted sources of variation, using `pp.regress_out`
172 ================================================================= 153 =================================================================
173 154
174 Regress out unwanted sources of variation, using simple linear regression. This is 155 Regress out unwanted sources of variation, using simple linear regression. This is
175 inspired by Seurat's `regressOut` function in R. 156 inspired by Seurat's `regressOut` function in R.
176 157
177 More details on the `scanpy documentation 158 More details on the `scanpy documentation
178 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.regress_out.html#scanpy.api.pp.regress_out>`__ 159 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.regress_out.html>`__
179 160
180 Score a set of genes, using `tl.score_genes` 161 Correct batch effects by matching mutual nearest neighbors, using `pp.mnn_correct`
181 ============================================ 162 ==================================================================================
182 163
183 The score is the average expression of a set of genes subtracted with the 164 This uses the implementation of mnnpy. Depending on do_concatenate, it returns AnnData objects in the
184 average expression of a reference set of genes. The reference set is 165 original order containing corrected expression values or a concatenated matrix or AnnData object.
185 randomly sampled from the `gene_pool` for each binned expression value.
186 166
187 This reproduces the approach in Seurat (Satija et al, 2015) and has been implemented 167 Be reminded that it is not advised to use the corrected data matrices for differential expression testing.
188 for Scanpy by Davide Cittaro.
189 168
190 More details on the `scanpy documentation 169 More details on the `scanpy documentation
191 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.score_genes.html#scanpy.api.tl.score_genes>`__ 170 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.api.pp.mnn_correct.html>`__
192 171
193 Score cell cycle genes, using `tl.score_genes_cell_cycle`
194 =========================================================
195 172
196 Given two lists of genes associated to S phase and G2M phase, calculates 173 Correct batch effects with ComBat function (`pp.combat`)
197 scores and assigns a cell cycle phase (G1, S or G2M). See 174 ========================================================
198 `score_genes` for more explanation. 175
176 Corrects for batch effects by fitting linear models, gains statistical power via an EB framework where information is borrowed across genes. This uses the implementation of ComBat
199 177
200 More details on the `scanpy documentation 178 More details on the `scanpy documentation
201 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.score_genes_cell_cycle.html#scanpy.api.tl.score_genes_cell_cycle>`__ 179 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.combat.html>`__
180
181
202 ]]></help> 182 ]]></help>
203 <expand macro="citations"/> 183 <expand macro="citations"/>
204 </tool> 184 </tool>