Mercurial > repos > iuc > scanpy_remove_confounders
comparison remove_confounders.xml @ 1:a89ee42625ad draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 8ef5f7c6f8728608a3f05bb51e11b642b84a05f5"
author | iuc |
---|---|
date | Wed, 16 Oct 2019 06:30:25 -0400 |
parents | 9ca360dde8e3 |
children | 94c8f42efc47 |
comparison
equal
deleted
inserted
replaced
0:9ca360dde8e3 | 1:a89ee42625ad |
---|---|
1 <tool id="scanpy_remove_confounders" name="Remove confounders with scanpy" version="@version@"> | 1 <tool id="scanpy_remove_confounders" name="Remove confounders" version="@version@"> |
2 <description></description> | 2 <description>with scanpy</description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 <xml name="score_genes_params"> | |
6 <param argument="n_bins" type="integer" value="25" label="Number of expression level bins for sampling" help=""/> | |
7 <param argument="random_state" type="integer" value="0" label="Random seed for sampling" help=""/> | |
8 <expand macro="param_use_raw"/> | |
9 </xml> | |
10 <token name="@CMD_score_genes_inputs@"><![CDATA[ | |
11 n_bins=$method.n_bins, | |
12 random_state=$method.random_state, | |
13 use_raw=$method.use_raw, | |
14 copy=False | |
15 ]]></token> | |
16 </macros> | 5 </macros> |
17 <expand macro="requirements"/> | 6 <expand macro="requirements"/> |
18 <command detect_errors="exit_code"><![CDATA[ | 7 <command detect_errors="exit_code"><![CDATA[ |
19 @CMD@ | 8 @CMD@ |
20 ]]></command> | 9 ]]></command> |
24 @CMD_read_inputs@ | 13 @CMD_read_inputs@ |
25 | 14 |
26 #if $method.method == "pp.regress_out" | 15 #if $method.method == "pp.regress_out" |
27 sc.pp.regress_out( | 16 sc.pp.regress_out( |
28 adata=adata, | 17 adata=adata, |
29 keys='$method.reg_keys', | 18 #set $keys = [str(x.strip()) for x in str($method.keys).split(',')] |
19 keys=$keys, | |
30 copy=False) | 20 copy=False) |
31 #elif $method.method == "tl.score_genes" | 21 |
32 sc.tl.score_genes( | 22 #else if $method.method == "pp.mnn_correct" |
33 adata=adata, | 23 #for i, filepath in enumerate($methods.extra_adata) |
34 #set $gene_list = [str(x.strip()) for x in str($method.gene_list).split(',')] | 24 adata_$i = ad.read('$filepath') |
35 gene_list=$gene_list, | 25 #end for |
36 ctrl_size=$method.ctrl_size, | 26 |
37 score_name='$method.score_name', | 27 sc.pp.mnn_correct( |
38 #if $method.gene_pool | 28 adata, |
39 #set $gene_pool = [str(x.strip()) for x in $method.gene_pool.split(',')] | 29 #for i, filepath in enumerate($methods.extra_adata) |
40 gene_pool=$gene_pool, | 30 adata_$i, |
31 #end for | |
32 #if str($methods.var_subset) != '' | |
33 #set $var_subset=([x.strip() for x in str($method.var_subset).split(',')]) | |
34 var_subset=$var_subset, | |
41 #end if | 35 #end if |
42 @CMD_score_genes_inputs@) | 36 batch_key='$method.batch_key', |
43 adata.obs.to_csv('$obs', sep='\t') | 37 index_unique='$method.index_unique' |
44 #elif $method.method == "tl.score_genes_cell_cycle" | 38 #if str($methods.batch_categories) != '' |
45 sc.tl.score_genes_cell_cycle( | 39 #set $batch_categories=([x.strip() for x in str($method.batch_categories).split(',')]) |
46 adata=adata, | 40 batch_categories=$batch_categories, |
47 #set $s_genes = [str(x.strip()) for x in $method.s_genes.split(',')] | 41 #end if |
48 s_genes=$s_genes, | 42 k=$method.k, |
49 #set $g2m_genes = [str(x.strip()) for x in $method.g2m_genes.split(',')] | 43 sigma=$method.sigma, |
50 g2m_genes=$g2m_genes, | 44 cos_norm_in=$method.cos_norm_in, |
51 @CMD_score_genes_inputs@) | 45 cos_norm_out=$method.cos_norm_out, |
52 adata.obs.to_csv('$obs', sep='\t') | 46 svd_dim=$method.svd_dim, |
47 var_adj=$method.var_adj, | |
48 compute_angle=$method.compute_angle, | |
49 mnn_order='$method.mnn_order', | |
50 svd_mode='$method.svd_mode', | |
51 do_concatenate=True, | |
52 save_raw=True, | |
53 n_jobs=\${GALAXY_SLOTS:-4}) | |
54 | |
55 #else if $method.method == "pp.combat" | |
56 sc.pp.combat( | |
57 adata, | |
58 key='$method.key', | |
59 inplace=True) | |
60 | |
53 #end if | 61 #end if |
54 | 62 |
55 @CMD_anndata_write_outputs@ | 63 @CMD_anndata_write_outputs@ |
56 ]]></configfile> | 64 ]]></configfile> |
57 </configfiles> | 65 </configfiles> |
58 <inputs> | 66 <inputs> |
59 <expand macro="inputs_anndata"/> | 67 <expand macro="inputs_anndata"/> |
60 <conditional name="method"> | 68 <conditional name="method"> |
61 <param argument="method" type="select" label="Method used for plotting"> | 69 <param argument="method" type="select" label="Method used for plotting"> |
62 <option value="pp.regress_out">Regress out unwanted sources of variation, using `pp.regress_out`</option> | 70 <option value="pp.regress_out">Regress out unwanted sources of variation, using `pp.regress_out`</option> |
63 <!--<option value="pp.mnn_correct">, using `pp.mnn_correct`</option>!--> | 71 <option value="pp.mnn_correct">Correct batch effects by matching mutual nearest neighbors, using `pp.mnn_correct`</option> |
64 <!--<option value="pp.dca">, using `pp.mnn_correct`</option>!--> | 72 <option value="pp.combat">Correct batch effects with ComBat function, using `pp.combat`</option> |
65 <!--<option value="pp.magic">, using `pp.magic`</option>!--> | |
66 <!--<option value="tl.sim">, using `tl.sim`</option>!--> | |
67 <!--<option value="pp.calculate_qc_metrics">, using `pp.calculate_qc_metrics`</option>!--> | |
68 <option value="tl.score_genes">Score a set of genes, using `tl.score_genes`</option> | |
69 <option value="tl.score_genes_cell_cycle">Score cell cycle genes, using `tl.score_genes_cell_cycle`</option> | |
70 <!--<option value="tl.cyclone">, using `tl.cyclone`</option>!--> | |
71 <!--<option value="tl.andbag">, using `tl.andbag`</option>!--> | |
72 </param> | 73 </param> |
73 <when value="pp.regress_out"> | 74 <when value="pp.regress_out"> |
74 <param argument="reg_keys" type="text" value="" label="Keys for observation annotation on which to regress on" help=""/> | 75 <param argument="keys" type="text" value="" label="Keys for observation annotation on which to regress on" help="Keys separated by a comma"/> |
75 </when> | 76 </when> |
76 <when value="tl.score_genes"> | 77 <when value="pp.mnn_correct"> |
77 <param argument="gene_list" type="text" value="" label="The list of gene names used for score calculation" help="Genes separated by a comma"/> | 78 <param name="extra_adata" type="data" multiple="true" optional="true" format="h5ad" label="Extra annotated data matrix" help="They should have same number of variables."/> |
78 <param argument="ctrl_size" type="integer" value="50" label="Number of reference genes to be sampled" | 79 <param argument="var_subset" type="text" value="" optional="true" label="The subset of vars to be used when performing MNN correction" help="List of comma-separated key from `.var_names`. If not set, all vars are used"/> |
79 help="If `len(gene_list)` is not too low, you can set `ctrl_size=len(gene_list)`."/> | 80 <param argument="batch_key" type="text" value="batch" label="Batch key for the concatenate"/> |
80 <param argument="gene_pool" type="text" value="" optional="true" label="Genes for sampling the reference set" | 81 <param name="index_unique" type="select" label="Separator to join the existing index names with the batch category" help="Leave it empty to keep existing indices"> |
81 help="Default is all genes. Genes separated by a comma"/> | 82 <option value="-">-</option> |
82 <expand macro="score_genes_params"/> | 83 <option value="_">_</option> |
83 <param argument="score_name" type="text" value="score" label="Name of the field to be added in `.obs`" help=""/> | 84 <option value=" "> </option> |
85 <option value="/">/</option> | |
86 </param> | |
87 <param argument="batch_categories" type="text" value="" optional="true" label="Batch categories for the concatenate" help="List of comma-separated key"/> | |
88 <param argument="k" type="integer" value="20" label="Number of mutual nearest neighbors"/> | |
89 <param argument="sigma" type="float" value="1" label="The bandwidth of the Gaussian smoothing kernel used to compute the correction vectors"/> | |
90 <param argument="cos_norm_in" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Should cosine normalization be performed on the input data prior to calculating distances between cells?"/> | |
91 <param argument="cos_norm_out" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Should cosine normalization be performed prior to computing corrected expression values?"/> | |
92 <param argument="svd_dim" type="integer" value="" optional="true" label="Number of dimensions to use for summarizing biological substructure within each batch" help="If not set, biological components will not be removed from the correction vectors."/> | |
93 <param argument="var_adj" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Adjust variance of the correction vectors?" help="This step takes most computing time."/> | |
94 <param argument="compute_angle" type="boolean" truevalue="True" falsevalue="False" checked="false" label="compute the angle between each cell’s correction vector and the biological subspace of the reference batch?"/> | |
95 <param argument="mnn_order" type="text" value="" optional="true" label="The order in which batches are to be corrected" help="List of comma-separated key. If not set, datas are corrected sequentially"/> | |
96 <param name="svd_mode" type="select" label="SVD mode"> | |
97 <option value="svd">svd: SVD using a non-randomized SVD-via-ID algorithm</option> | |
98 <option value="rsvd" selected="true">rsvd: SVD using a randomized SVD-via-ID algorithm</option> | |
99 <option value="irlb">irlb: truncated SVD by implicitly restarted Lanczos bidiagonalization</option> | |
100 </param> | |
84 </when> | 101 </when> |
85 <when value="tl.score_genes_cell_cycle"> | 102 <when value="pp.combat"> |
86 <param name="s_genes" type="text" value="" label="List of genes associated with S phase" help="Genes separated by a comma"/> | 103 <param argument="key" type="text" value="batch" label="Key to a categorical annotation from adata.obs that will be used for batch effect removal"/> |
87 <param name="g2m_genes" type="text" value="" label="List of genes associated with G2M phase" help="Genes separated by a comma"/> | |
88 <expand macro="score_genes_params"/> | |
89 </when> | 104 </when> |
90 </conditional> | 105 </conditional> |
91 <expand macro="anndata_output_format"/> | |
92 </inputs> | 106 </inputs> |
93 <outputs> | 107 <outputs> |
94 <expand macro="anndata_outputs"/> | 108 <expand macro="anndata_outputs"/> |
95 <data name="obs" format="tabular" label="${tool.name} on ${on_string}: Observations annotation"> | |
96 <filter>method['method'] == 'tl.score_genes' or method['method'] == 'tl.score_genes_cell_cycle'</filter> | |
97 </data> | |
98 </outputs> | 109 </outputs> |
99 <tests> | 110 <tests> |
100 <test> | 111 <test> |
101 <conditional name="input"> | 112 <!-- test 1 --> |
102 <param name="format" value="h5ad" /> | 113 <param name="adata" value="krumsiek11.h5ad" /> |
103 <param name="adata" value="krumsiek11.h5ad" /> | |
104 </conditional> | |
105 <conditional name="method"> | 114 <conditional name="method"> |
106 <param name="method" value="pp.regress_out"/> | 115 <param name="method" value="pp.regress_out"/> |
116 <param name="keys" value="cell_type"/> | |
117 </conditional> | |
118 <assert_stdout> | |
119 <has_text_matching expression="sc.pp.regress_out"/> | |
120 <has_text_matching expression="keys=\['cell_type'\]"/> | |
121 </assert_stdout> | |
122 <output name="anndata_out" file="pp.regress_out.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
123 </test> | |
124 <!--<test> | |
125 < test 2 > | |
126 <param name="adata" value="krumsiek11.h5ad" /> | |
127 <conditional name="method"> | |
128 <param name="method" value="pp.mnn_correct"/> | |
107 <param name="reg_keys" value="cell_type"/> | 129 <param name="reg_keys" value="cell_type"/> |
108 </conditional> | 130 </conditional> |
109 <param name="anndata_output_format" value="h5ad" /> | |
110 <assert_stdout> | 131 <assert_stdout> |
111 <has_text_matching expression="sc.pp.regress_out"/> | 132 <has_text_matching expression="sc.pp.mnn_correct"/> |
112 <has_text_matching expression="keys='cell_type'"/> | 133 <has_text_matching expression="keys='cell_type'"/> |
113 </assert_stdout> | 134 </assert_stdout> |
114 <output name="anndata_out_h5ad" file="pp.regress_out.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> | 135 <output name="anndata_out" file="pp.mnn_correct.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> |
115 </test> | 136 </test>--> |
116 <test> | 137 <test> |
117 <conditional name="input"> | 138 <!-- test 2 --> |
118 <param name="format" value="h5ad" /> | 139 <param name="adata" value="blobs.h5ad" /> |
119 <param name="adata" value="krumsiek11.h5ad" /> | 140 <conditional name="method"> |
141 <param name="method" value="pp.combat"/> | |
142 <param name="key" value="blobs"/> | |
120 </conditional> | 143 </conditional> |
121 <conditional name="method"> | |
122 <param name="method" value="tl.score_genes"/> | |
123 <param name="gene_list" value="Gata2, Fog1"/> | |
124 <param name="ctrl_size" value="2"/> | |
125 <param name="n_bins" value="2"/> | |
126 <param name="random_state" value="2"/> | |
127 <param name="use_raw" value="False"/> | |
128 <param name="score_name" value="score"/> | |
129 </conditional> | |
130 <param name="anndata_output_format" value="h5ad"/> | |
131 <assert_stdout> | 144 <assert_stdout> |
132 <has_text_matching expression="sc.tl.score_genes" /> | 145 <has_text_matching expression="sc.pp.combat"/> |
133 <has_text_matching expression="gene_list=\['Gata2', 'Fog1'\]" /> | 146 <has_text_matching expression="key='blobs'"/> |
134 <has_text_matching expression="ctrl_size=2" /> | |
135 <has_text_matching expression="score_name='score'" /> | |
136 <has_text_matching expression="n_bins=2" /> | |
137 <has_text_matching expression="random_state=2" /> | |
138 <has_text_matching expression="use_raw=False" /> | |
139 <has_text_matching expression="copy=False" /> | |
140 </assert_stdout> | 147 </assert_stdout> |
141 <output name="anndata_out_h5ad" file="tl.score_genes.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> | 148 <output name="anndata_out" file="pp.combat.blobs.h5ad" ftype="h5ad" compare="sim_size"/> |
142 <output name="obs" file="tl.score_genes.krumsiek11.obs.tabular" ftype="tabular" compare="sim_size"/> | |
143 </test> | |
144 <test> | |
145 <conditional name="input"> | |
146 <param name="format" value="h5ad" /> | |
147 <param name="adata" value="krumsiek11.h5ad" /> | |
148 </conditional> | |
149 <conditional name="method"> | |
150 <param name="method" value="tl.score_genes_cell_cycle"/> | |
151 <param name="s_genes" value="Gata2, Fog1, EgrNab"/> | |
152 <param name="g2m_genes" value="Gata2, Fog1, EgrNab"/> | |
153 <param name="n_bins" value="2"/> | |
154 <param name="random_state" value="1"/> | |
155 <param name="use_raw" value="False"/> | |
156 </conditional> | |
157 <param name="anndata_output_format" value="h5ad"/> | |
158 <assert_stdout> | |
159 <has_text_matching expression="sc.tl.score_genes_cell_cycle"/> | |
160 <has_text_matching expression="s_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/> | |
161 <has_text_matching expression="g2m_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/> | |
162 <has_text_matching expression="n_bins=2"/> | |
163 <has_text_matching expression="random_state=1"/> | |
164 <has_text_matching expression="use_raw=False"/> | |
165 </assert_stdout> | |
166 <output name="anndata_out_h5ad" file="tl.score_genes_cell_cycle.krumsiek11.h5ad" ftype="h5" compare="sim_size"/> | |
167 <output name="obs" file="tl.score_genes_cell_cycle.krumsiek11.obs.tabular" ftype="tabular" compare="sim_size"/> | |
168 </test> | 149 </test> |
169 </tests> | 150 </tests> |
170 <help><![CDATA[ | 151 <help><![CDATA[ |
171 Regress out unwanted sources of variation, using `pp.regress_out` | 152 Regress out unwanted sources of variation, using `pp.regress_out` |
172 ================================================================= | 153 ================================================================= |
173 | 154 |
174 Regress out unwanted sources of variation, using simple linear regression. This is | 155 Regress out unwanted sources of variation, using simple linear regression. This is |
175 inspired by Seurat's `regressOut` function in R. | 156 inspired by Seurat's `regressOut` function in R. |
176 | 157 |
177 More details on the `scanpy documentation | 158 More details on the `scanpy documentation |
178 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.regress_out.html#scanpy.api.pp.regress_out>`__ | 159 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.regress_out.html>`__ |
179 | 160 |
180 Score a set of genes, using `tl.score_genes` | 161 Correct batch effects by matching mutual nearest neighbors, using `pp.mnn_correct` |
181 ============================================ | 162 ================================================================================== |
182 | 163 |
183 The score is the average expression of a set of genes subtracted with the | 164 This uses the implementation of mnnpy. Depending on do_concatenate, it returns AnnData objects in the |
184 average expression of a reference set of genes. The reference set is | 165 original order containing corrected expression values or a concatenated matrix or AnnData object. |
185 randomly sampled from the `gene_pool` for each binned expression value. | |
186 | 166 |
187 This reproduces the approach in Seurat (Satija et al, 2015) and has been implemented | 167 Be reminded that it is not advised to use the corrected data matrices for differential expression testing. |
188 for Scanpy by Davide Cittaro. | |
189 | 168 |
190 More details on the `scanpy documentation | 169 More details on the `scanpy documentation |
191 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.score_genes.html#scanpy.api.tl.score_genes>`__ | 170 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.api.pp.mnn_correct.html>`__ |
192 | 171 |
193 Score cell cycle genes, using `tl.score_genes_cell_cycle` | |
194 ========================================================= | |
195 | 172 |
196 Given two lists of genes associated to S phase and G2M phase, calculates | 173 Correct batch effects with ComBat function (`pp.combat`) |
197 scores and assigns a cell cycle phase (G1, S or G2M). See | 174 ======================================================== |
198 `score_genes` for more explanation. | 175 |
176 Corrects for batch effects by fitting linear models, gains statistical power via an EB framework where information is borrowed across genes. This uses the implementation of ComBat | |
199 | 177 |
200 More details on the `scanpy documentation | 178 More details on the `scanpy documentation |
201 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.score_genes_cell_cycle.html#scanpy.api.tl.score_genes_cell_cycle>`__ | 179 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.combat.html>`__ |
180 | |
181 | |
202 ]]></help> | 182 ]]></help> |
203 <expand macro="citations"/> | 183 <expand macro="citations"/> |
204 </tool> | 184 </tool> |