Mercurial > repos > goeckslab > celesta
comparison celesta.xml @ 0:8001319743c0 draft
planemo upload for repository https://github.com/goeckslab/tools-mti/tree/main/tools/celesta commit 0ec46718dfd00f37ccae4e2fa133fa8393fe6d92
author | goeckslab |
---|---|
date | Wed, 28 Aug 2024 12:46:48 +0000 |
parents | |
children | 44d4c885d9b5 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8001319743c0 |
---|---|
1 <tool id="celesta" name="CELESTA cell typing" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
2 <description>Cell type identification with spatial information</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="celesta_requirements"/> | |
7 <expand macro="macro_stdio" /> | |
8 <version_command>echo "@VERSION@"</version_command> | |
9 <command detect_errors="aggressive"> | |
10 <![CDATA[ | |
11 #if str($runmode.selected_mode) == 'plot_expression': | |
12 Rscript '$__tool_directory__/celesta_plot_expression.R' | |
13 --imagingdata '$anndata' | |
14 --prior '$prior_info' | |
15 --xcol '$x_coord' | |
16 --ycol '$y_coord' | |
17 --size '$test_size' | |
18 --height '$height' | |
19 --width '$width' | |
20 #if str($filter_cells.filter) == 'filter': | |
21 --filter | |
22 --lowfilter '$low_threshold' | |
23 --highfilter '$high_threshold' | |
24 #end if | |
25 #else if str($runmode.selected_mode) == 'assign_cells': | |
26 Rscript '$__tool_directory__/celesta_assign_cells.R' | |
27 --imagingdata '$anndata' | |
28 --prior '$prior_info' | |
29 --xcol '$x_coord' | |
30 --ycol '$y_coord' | |
31 --maxiteration '$max_iteration' | |
32 --changethresh '$cell_change_threshold' | |
33 #if str($filter_cells.filter) == 'filter': | |
34 --filter | |
35 --lowfilter '$low_threshold' | |
36 --highfilter '$high_threshold' | |
37 #end if | |
38 #if $low_thresholds_file: | |
39 --lowexpthresh '$low_thresholds_file' | |
40 #end if | |
41 #if $high_thresholds_file: | |
42 --highexpthresh '$high_thresholds_file' | |
43 #end if | |
44 #for $p in $plot_cells: | |
45 && Rscript '$__tool_directory__/celesta_plot_cells.R' | |
46 --prior '$prior_info' | |
47 --celltypes '${p.cell_types}' | |
48 --size '$p.test_size' | |
49 --height '$p.height' | |
50 --width '$p.width' | |
51 --dpi '$p.dpi' | |
52 #end for | |
53 #end if | |
54 ]]> | |
55 </command> | |
56 <configfiles> | |
57 <inputs name="inputs" /> | |
58 </configfiles> | |
59 <inputs> | |
60 <param name="anndata" type="data" format="h5ad" label="Input anndata" /> | |
61 <param name="prior_info" type="data" format="csv" label="Cell-type signature matrix" /> | |
62 <conditional name="runmode"> | |
63 <param name="selected_mode" type="select" label="Select which CELESTA mode to run"> | |
64 <option value="plot_expression" selected="true">Plot expression probabilities for markers in the cell type signature matrix</option> | |
65 <option value="assign_cells">Run the cell type assignment</option> | |
66 </param> | |
67 <when value="plot_expression"> | |
68 <expand macro="celesta_base_options" /> | |
69 <section name="figure_options" title="Figure Options" expanded="true"> | |
70 <param argument="test_size" type="float" value="1" min="0.1" max="10" label="Specify the point size for plotting cells" /> | |
71 <param argument="height" type="integer" value="4" min="4" max="20" label="Specify the height of the figure (inches)" /> | |
72 <param argument="width" type="integer" value="5" min="4" max="20" label="Specify the width of the figure (inches)" /> | |
73 </section> | |
74 </when> | |
75 <when value="assign_cells"> | |
76 <expand macro="celesta_base_options" /> | |
77 <section name="options" title="Advanced Options" expanded="false"> | |
78 <param argument="max_iteration" type="integer" value="10" label="Define the maximum iterations allowed in the EM algorithm per round" /> | |
79 <param argument="cell_change_threshold" type="float" value="0.01" label="Define an ending condition for the EM algorithm" help="0.01 means that when fewer than 1% of the total number of cells do not change identity, the algorithm will stop" /> | |
80 <param name="low_thresholds_file" type="data" format="csv" optional="true" label="Provide a file mapping low anchor and index cell assignment thresholds to cell types" /> | |
81 <param name="high_thresholds_file" type="data" format="csv" optional="true" label="Provide a file mapping high anchor and index cell assignment thresholds to cell types" /> | |
82 <param name="save_rds" type="boolean" checked="false" label="Also save CELESTA object as RDS file" help="Saving CELESTA object as RDS can allow for easier downstream analysis in R" /> | |
83 </section> | |
84 <repeat name="plot_cells" title="Plot combinations of resulting cell type assignments" min="0"> | |
85 <param name="cell_types" type="text" label="Provide a comma-separated list of cell type names to plot together"> | |
86 <sanitizer> | |
87 <valid initial="string.printable"/> | |
88 </sanitizer> | |
89 </param> | |
90 <param argument="test_size" type="float" value="1" min="0.1" max="10" label="Specify the point size for plotting cells" /> | |
91 <param argument="height" type="integer" value="12" min="4" max="20" label="Specify the height of the figure (inches)" /> | |
92 <param argument="width" type="integer" value="12" min="4" max="20" label="Specify the width of the figure (inches)" /> | |
93 <param argument="dpi" type="integer" value="300" min="50" max="500" label="Specify the DPI of the figure" /> | |
94 </repeat> | |
95 </when> | |
96 </conditional> | |
97 </inputs> | |
98 <outputs> | |
99 <collection name="marker_expression_plots" type="list" label="Marker expression probability plots"> | |
100 <discover_datasets pattern="__name_and_ext__" directory="marker_exp_plots" ext="png" /> | |
101 <filter>runmode['selected_mode'] == "plot_expression"</filter> | |
102 </collection> | |
103 <data name="assign_cells_output" format="h5ad" label="CELESTA assign cells output" from_work_dir="result.h5ad" > | |
104 <filter>runmode['selected_mode'] == "assign_cells"</filter> | |
105 </data> | |
106 <data name="assign_cells_rds" format="rds" label="CELESTA object RDS" from_work_dir="celestaobj.rds" > | |
107 <filter>runmode['selected_mode'] == "assign_cells" and runmode['options']['save_rds']</filter> | |
108 </data> | |
109 <collection name="cell_assign_plots" type="list" label="Cell assignment plots"> | |
110 <discover_datasets pattern="__name_and_ext__" directory="cell_assign_plots" ext="png" /> | |
111 <filter>runmode['selected_mode'] == "assign_cells" and len(runmode['plot_cells']) != 0</filter> | |
112 </collection> | |
113 </outputs> | |
114 <tests> | |
115 <test expect_num_outputs="1"> | |
116 <param name="anndata" value="celesta_image.h5ad" /> | |
117 <param name="prior_info" value="celesta_prior.csv" /> | |
118 <conditional name="runmode"> | |
119 <param name="selected_mode" value="plot_expression" /> | |
120 </conditional> | |
121 <output_collection name="marker_expression_plots" type="list" count="18"> | |
122 <element name="CD31_VASCULATURE_CYC_19_CH_3_exp_prob" file="CD31_VASCULATURE_CYC_19_CH_3_exp_prob.png" compare="sim_size" /> | |
123 </output_collection> | |
124 </test> | |
125 <test expect_num_outputs="1"> | |
126 <param name="anndata" value="celesta_image.h5ad" /> | |
127 <param name="prior_info" value="celesta_prior.csv" /> | |
128 <conditional name="runmode"> | |
129 <param name="selected_mode" value="assign_cells" /> | |
130 </conditional> | |
131 <output name="assign_cells_output"> | |
132 <assert_contents> | |
133 <has_h5_keys keys="obs/celesta_final_cell_type" /> | |
134 </assert_contents> | |
135 </output> | |
136 <assert_stdout> | |
137 <has_text text="vasculature 273" /> | |
138 </assert_stdout> | |
139 </test> | |
140 <test expect_num_outputs="3"> | |
141 <param name="anndata" value="celesta_image.h5ad" /> | |
142 <param name="prior_info" value="celesta_prior.csv" /> | |
143 <param name="filter" value="filter" /> | |
144 <conditional name="runmode"> | |
145 <param name="selected_mode" value="assign_cells" /> | |
146 </conditional> | |
147 <param name="high_thresholds_file" value="celesta_high_exp_thresholds.csv" /> | |
148 <repeat name="plot_cells"> | |
149 <param name="cell_types" value="vasculature" /> | |
150 </repeat> | |
151 <param name="save_rds" value="true" /> | |
152 <output name="assign_cells_output"> | |
153 <assert_contents> | |
154 <has_h5_keys keys="obs/celesta_final_cell_type" /> | |
155 </assert_contents> | |
156 </output> | |
157 <output_collection name="cell_assign_plots" type="list" count="1"> | |
158 <element name="plot_cells_vasculature" file="plot_cells_vasculature.png" compare="sim_size" /> | |
159 </output_collection> | |
160 <output name="assign_cells_rds"> | |
161 <assert_contents> | |
162 <has_size value="1400000" delta="100000" /> | |
163 </assert_contents> | |
164 </output> | |
165 <assert_stdout> | |
166 <has_text text="vasculature 168" /> | |
167 </assert_stdout> | |
168 </test> | |
169 </tests> | |
170 <help> | |
171 <![CDATA[ | |
172 **What it does** | |
173 | |
174 CELESTA (CELl typE identification with SpaTiAl information) is an algorithm aiming to perform | |
175 automated cell type identification for multiplexed in situ imaging data. | |
176 | |
177 CELESTA makes use of both protein expressions and cell spatial neighborhood information | |
178 from segmented imaging data for the cell type identification. | |
179 | |
180 This Galaxy implementation of CELESTA has two run modes: | |
181 | |
182 **Both run modes share the following inputs** | |
183 | |
184 `Input Anndata` -- anndata h5ad file where cells are rows, with marker expression in adata.X and cell coordinates in adata.obs | |
185 | |
186 `Cell-type signature matrix` -- Comma-separated text file containing the following information and formatting: | |
187 | |
188 (1) The first column has to contain the cell types to be inferred | |
189 | |
190 (2) The second column has the lineage information for each cell type. The lineage information has three numbers | |
191 connected by “_” (underscore). The first number indicates round. Cell types with the same lineage level are | |
192 inferred at the same round. Increasing number indicates increasing cell-type resolution. For example, | |
193 immune cells -> CD3+ T cells –> CD4+ T cells. The third number is a number assigned to the cell type, | |
194 i.e, cell type number. The middle number tells the previous lineage cell type number for the current cell type. | |
195 For example, the middle number for CD3+ T cells is 5, because it is a subtype of immune cells which have cell | |
196 type number assigned to 5. | |
197 | |
198 (3) Starting from column three, each column is a protein marker. If the protein marker is known to be expressed | |
199 for that cell type, then it is denoted by “1”. If the protein marker is known to not express for a cell type, | |
200 then it is denoted by “0”. If the protein marker is irrelevant or uncertain to express for a cell type, | |
201 then it is denoted by “NA”. | |
202 | |
203 `Name of anndata.obs key containing cell or nucleus centroid X position` -- if using output from MCMICRO, this would be 'X_centroid' | |
204 | |
205 `Name of anndata.obs key containing cell or nucleus centroid Y position` -- if using output from MCMICRO, this would be 'Y_centroid' | |
206 | |
207 `Choose whether to filter cells` -- Boolean whether to filter out cells with extreme low or high marker intensity that fall outside of thresholds (`CELESTA::FilterCells()`) | |
208 | |
209 `Set the low threshold for filtering cells` -- high_marker_threshold param in `CELESTA::FilterCells()` | |
210 | |
211 `Set the high threshold for filtering cells` -- low_marker_threshold param in `CELESTA::FilterCells()` | |
212 | |
213 **Run modes** | |
214 | |
215 1. Plot expression probabilities for markers in the cell type signature matrix | |
216 | |
217 This run mode generates marker expression probability plots for every marker in the cell-type signature matrix. | |
218 | |
219 **Additional inputs** | |
220 | |
221 `Specify the point size for plotting cells` -- passed to `ggplot2::geom_point()` size param | |
222 | |
223 `Specify the height of the figure (inches)` -- passed to `ggplot2::ggsave()` height param | |
224 | |
225 `Specify the width of the figure (inches)` -- passed to `ggplot2::ggsave()` width param | |
226 | |
227 **Outputs** | |
228 | |
229 Collection of `.png` figures showing marker intensity probabilities as spatial scatter plots | |
230 | |
231 2. Run the cell type assignment | |
232 | |
233 **Additional inputs** | |
234 | |
235 `Define the maximum iterations allowed in the EM algorithm per round` -- passed to `CELESTA::AssignCells()` max_iteration param | |
236 | |
237 `Define an ending condition for the EM algorithm` -- passed to `CELESTA::AssignCells()` cell_change_threshold param | |
238 | |
239 `Provide a file mapping low/high anchor and index cell assignment thresholds to cell types` -- comma separated text file containing following information and formatting: | |
240 | |
241 (1) First column contains cell types to be inferred (same order as the cell type signature matrix) | |
242 Second column is named `anchor` and contains high or low thresholds for anchor cells | |
243 Third column is named `index` and contains high or low thresholds for index cells | |
244 | |
245 (2) In the `CELESTA::AssignCells()` function, it requires four vectors to define the high and low thresholds for each cell type. The length of the vector equals to the | |
246 total number of cell types defined in the cell-type signature matrix. We would suggest start with the default thresholds and modify them by comparing the results | |
247 with the original staining. The two vectors are required for defining the “high_expression_threshold”, one for anchor cells and one for index cells (non-anchor cells). | |
248 The thresholds define how much the marker expression probability is in order to be considered as expressed. | |
249 | |
250 (3) For the low thresholds, Normally 1 is assigned to this value unless there are a lot of doublets or co-staining in the data. The Low expression threshold default | |
251 values in general are robust, and thus we recommend testing the High expression threshold values. | |
252 | |
253 `Also save CELESTA object as RDS file` -- Boolean whether to output an RDS file in addition to the default h5ad output | |
254 | |
255 `Plot combinations of resulting cell type assignments` -- specify any combination of cell types from the cell type signature matrix to plot. This is a repeat element, and one plot will be generated per repitition. There are additional params to control plot aesthetic attributes | |
256 | |
257 **Outputs** | |
258 | |
259 `CELESTA assign cells output` -- The primary output, an h5ad file, with new columns containing cell type information. New columns are prepended with `celesta_` | |
260 | |
261 `CELESTA object RDS` -- optionally output CELESTA object as RDS for downstream analysis in R | |
262 | |
263 Optional collection of `.png` figures of spatial scatter plots color annotated by cell type assignment | |
264 | |
265 Visit github.com/plevritis-lab/CELESTA for full documentation | |
266 | |
267 ]]> | |
268 </help> | |
269 <expand macro="citations" /> | |
270 </tool> |