comparison celesta.xml @ 0:8001319743c0 draft

planemo upload for repository https://github.com/goeckslab/tools-mti/tree/main/tools/celesta commit 0ec46718dfd00f37ccae4e2fa133fa8393fe6d92
author goeckslab
date Wed, 28 Aug 2024 12:46:48 +0000
parents
children 44d4c885d9b5
comparison
equal deleted inserted replaced
-1:000000000000 0:8001319743c0
1 <tool id="celesta" name="CELESTA cell typing" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>Cell type identification with spatial information</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="celesta_requirements"/>
7 <expand macro="macro_stdio" />
8 <version_command>echo "@VERSION@"</version_command>
9 <command detect_errors="aggressive">
10 <![CDATA[
11 #if str($runmode.selected_mode) == 'plot_expression':
12 Rscript '$__tool_directory__/celesta_plot_expression.R'
13 --imagingdata '$anndata'
14 --prior '$prior_info'
15 --xcol '$x_coord'
16 --ycol '$y_coord'
17 --size '$test_size'
18 --height '$height'
19 --width '$width'
20 #if str($filter_cells.filter) == 'filter':
21 --filter
22 --lowfilter '$low_threshold'
23 --highfilter '$high_threshold'
24 #end if
25 #else if str($runmode.selected_mode) == 'assign_cells':
26 Rscript '$__tool_directory__/celesta_assign_cells.R'
27 --imagingdata '$anndata'
28 --prior '$prior_info'
29 --xcol '$x_coord'
30 --ycol '$y_coord'
31 --maxiteration '$max_iteration'
32 --changethresh '$cell_change_threshold'
33 #if str($filter_cells.filter) == 'filter':
34 --filter
35 --lowfilter '$low_threshold'
36 --highfilter '$high_threshold'
37 #end if
38 #if $low_thresholds_file:
39 --lowexpthresh '$low_thresholds_file'
40 #end if
41 #if $high_thresholds_file:
42 --highexpthresh '$high_thresholds_file'
43 #end if
44 #for $p in $plot_cells:
45 && Rscript '$__tool_directory__/celesta_plot_cells.R'
46 --prior '$prior_info'
47 --celltypes '${p.cell_types}'
48 --size '$p.test_size'
49 --height '$p.height'
50 --width '$p.width'
51 --dpi '$p.dpi'
52 #end for
53 #end if
54 ]]>
55 </command>
56 <configfiles>
57 <inputs name="inputs" />
58 </configfiles>
59 <inputs>
60 <param name="anndata" type="data" format="h5ad" label="Input anndata" />
61 <param name="prior_info" type="data" format="csv" label="Cell-type signature matrix" />
62 <conditional name="runmode">
63 <param name="selected_mode" type="select" label="Select which CELESTA mode to run">
64 <option value="plot_expression" selected="true">Plot expression probabilities for markers in the cell type signature matrix</option>
65 <option value="assign_cells">Run the cell type assignment</option>
66 </param>
67 <when value="plot_expression">
68 <expand macro="celesta_base_options" />
69 <section name="figure_options" title="Figure Options" expanded="true">
70 <param argument="test_size" type="float" value="1" min="0.1" max="10" label="Specify the point size for plotting cells" />
71 <param argument="height" type="integer" value="4" min="4" max="20" label="Specify the height of the figure (inches)" />
72 <param argument="width" type="integer" value="5" min="4" max="20" label="Specify the width of the figure (inches)" />
73 </section>
74 </when>
75 <when value="assign_cells">
76 <expand macro="celesta_base_options" />
77 <section name="options" title="Advanced Options" expanded="false">
78 <param argument="max_iteration" type="integer" value="10" label="Define the maximum iterations allowed in the EM algorithm per round" />
79 <param argument="cell_change_threshold" type="float" value="0.01" label="Define an ending condition for the EM algorithm" help="0.01 means that when fewer than 1% of the total number of cells do not change identity, the algorithm will stop" />
80 <param name="low_thresholds_file" type="data" format="csv" optional="true" label="Provide a file mapping low anchor and index cell assignment thresholds to cell types" />
81 <param name="high_thresholds_file" type="data" format="csv" optional="true" label="Provide a file mapping high anchor and index cell assignment thresholds to cell types" />
82 <param name="save_rds" type="boolean" checked="false" label="Also save CELESTA object as RDS file" help="Saving CELESTA object as RDS can allow for easier downstream analysis in R" />
83 </section>
84 <repeat name="plot_cells" title="Plot combinations of resulting cell type assignments" min="0">
85 <param name="cell_types" type="text" label="Provide a comma-separated list of cell type names to plot together">
86 <sanitizer>
87 <valid initial="string.printable"/>
88 </sanitizer>
89 </param>
90 <param argument="test_size" type="float" value="1" min="0.1" max="10" label="Specify the point size for plotting cells" />
91 <param argument="height" type="integer" value="12" min="4" max="20" label="Specify the height of the figure (inches)" />
92 <param argument="width" type="integer" value="12" min="4" max="20" label="Specify the width of the figure (inches)" />
93 <param argument="dpi" type="integer" value="300" min="50" max="500" label="Specify the DPI of the figure" />
94 </repeat>
95 </when>
96 </conditional>
97 </inputs>
98 <outputs>
99 <collection name="marker_expression_plots" type="list" label="Marker expression probability plots">
100 <discover_datasets pattern="__name_and_ext__" directory="marker_exp_plots" ext="png" />
101 <filter>runmode['selected_mode'] == "plot_expression"</filter>
102 </collection>
103 <data name="assign_cells_output" format="h5ad" label="CELESTA assign cells output" from_work_dir="result.h5ad" >
104 <filter>runmode['selected_mode'] == "assign_cells"</filter>
105 </data>
106 <data name="assign_cells_rds" format="rds" label="CELESTA object RDS" from_work_dir="celestaobj.rds" >
107 <filter>runmode['selected_mode'] == "assign_cells" and runmode['options']['save_rds']</filter>
108 </data>
109 <collection name="cell_assign_plots" type="list" label="Cell assignment plots">
110 <discover_datasets pattern="__name_and_ext__" directory="cell_assign_plots" ext="png" />
111 <filter>runmode['selected_mode'] == "assign_cells" and len(runmode['plot_cells']) != 0</filter>
112 </collection>
113 </outputs>
114 <tests>
115 <test expect_num_outputs="1">
116 <param name="anndata" value="celesta_image.h5ad" />
117 <param name="prior_info" value="celesta_prior.csv" />
118 <conditional name="runmode">
119 <param name="selected_mode" value="plot_expression" />
120 </conditional>
121 <output_collection name="marker_expression_plots" type="list" count="18">
122 <element name="CD31_VASCULATURE_CYC_19_CH_3_exp_prob" file="CD31_VASCULATURE_CYC_19_CH_3_exp_prob.png" compare="sim_size" />
123 </output_collection>
124 </test>
125 <test expect_num_outputs="1">
126 <param name="anndata" value="celesta_image.h5ad" />
127 <param name="prior_info" value="celesta_prior.csv" />
128 <conditional name="runmode">
129 <param name="selected_mode" value="assign_cells" />
130 </conditional>
131 <output name="assign_cells_output">
132 <assert_contents>
133 <has_h5_keys keys="obs/celesta_final_cell_type" />
134 </assert_contents>
135 </output>
136 <assert_stdout>
137 <has_text text="vasculature 273" />
138 </assert_stdout>
139 </test>
140 <test expect_num_outputs="3">
141 <param name="anndata" value="celesta_image.h5ad" />
142 <param name="prior_info" value="celesta_prior.csv" />
143 <param name="filter" value="filter" />
144 <conditional name="runmode">
145 <param name="selected_mode" value="assign_cells" />
146 </conditional>
147 <param name="high_thresholds_file" value="celesta_high_exp_thresholds.csv" />
148 <repeat name="plot_cells">
149 <param name="cell_types" value="vasculature" />
150 </repeat>
151 <param name="save_rds" value="true" />
152 <output name="assign_cells_output">
153 <assert_contents>
154 <has_h5_keys keys="obs/celesta_final_cell_type" />
155 </assert_contents>
156 </output>
157 <output_collection name="cell_assign_plots" type="list" count="1">
158 <element name="plot_cells_vasculature" file="plot_cells_vasculature.png" compare="sim_size" />
159 </output_collection>
160 <output name="assign_cells_rds">
161 <assert_contents>
162 <has_size value="1400000" delta="100000" />
163 </assert_contents>
164 </output>
165 <assert_stdout>
166 <has_text text="vasculature 168" />
167 </assert_stdout>
168 </test>
169 </tests>
170 <help>
171 <![CDATA[
172 **What it does**
173
174 CELESTA (CELl typE identification with SpaTiAl information) is an algorithm aiming to perform
175 automated cell type identification for multiplexed in situ imaging data.
176
177 CELESTA makes use of both protein expressions and cell spatial neighborhood information
178 from segmented imaging data for the cell type identification.
179
180 This Galaxy implementation of CELESTA has two run modes:
181
182 **Both run modes share the following inputs**
183
184 `Input Anndata` -- anndata h5ad file where cells are rows, with marker expression in adata.X and cell coordinates in adata.obs
185
186 `Cell-type signature matrix` -- Comma-separated text file containing the following information and formatting:
187
188 (1) The first column has to contain the cell types to be inferred
189
190 (2) The second column has the lineage information for each cell type. The lineage information has three numbers
191 connected by “_” (underscore). The first number indicates round. Cell types with the same lineage level are
192 inferred at the same round. Increasing number indicates increasing cell-type resolution. For example,
193 immune cells -> CD3+ T cells –> CD4+ T cells. The third number is a number assigned to the cell type,
194 i.e, cell type number. The middle number tells the previous lineage cell type number for the current cell type.
195 For example, the middle number for CD3+ T cells is 5, because it is a subtype of immune cells which have cell
196 type number assigned to 5.
197
198 (3) Starting from column three, each column is a protein marker. If the protein marker is known to be expressed
199 for that cell type, then it is denoted by “1”. If the protein marker is known to not express for a cell type,
200 then it is denoted by “0”. If the protein marker is irrelevant or uncertain to express for a cell type,
201 then it is denoted by “NA”.
202
203 `Name of anndata.obs key containing cell or nucleus centroid X position` -- if using output from MCMICRO, this would be 'X_centroid'
204
205 `Name of anndata.obs key containing cell or nucleus centroid Y position` -- if using output from MCMICRO, this would be 'Y_centroid'
206
207 `Choose whether to filter cells` -- Boolean whether to filter out cells with extreme low or high marker intensity that fall outside of thresholds (`CELESTA::FilterCells()`)
208
209 `Set the low threshold for filtering cells` -- high_marker_threshold param in `CELESTA::FilterCells()`
210
211 `Set the high threshold for filtering cells` -- low_marker_threshold param in `CELESTA::FilterCells()`
212
213 **Run modes**
214
215 1. Plot expression probabilities for markers in the cell type signature matrix
216
217 This run mode generates marker expression probability plots for every marker in the cell-type signature matrix.
218
219 **Additional inputs**
220
221 `Specify the point size for plotting cells` -- passed to `ggplot2::geom_point()` size param
222
223 `Specify the height of the figure (inches)` -- passed to `ggplot2::ggsave()` height param
224
225 `Specify the width of the figure (inches)` -- passed to `ggplot2::ggsave()` width param
226
227 **Outputs**
228
229 Collection of `.png` figures showing marker intensity probabilities as spatial scatter plots
230
231 2. Run the cell type assignment
232
233 **Additional inputs**
234
235 `Define the maximum iterations allowed in the EM algorithm per round` -- passed to `CELESTA::AssignCells()` max_iteration param
236
237 `Define an ending condition for the EM algorithm` -- passed to `CELESTA::AssignCells()` cell_change_threshold param
238
239 `Provide a file mapping low/high anchor and index cell assignment thresholds to cell types` -- comma separated text file containing following information and formatting:
240
241 (1) First column contains cell types to be inferred (same order as the cell type signature matrix)
242 Second column is named `anchor` and contains high or low thresholds for anchor cells
243 Third column is named `index` and contains high or low thresholds for index cells
244
245 (2) In the `CELESTA::AssignCells()` function, it requires four vectors to define the high and low thresholds for each cell type. The length of the vector equals to the
246 total number of cell types defined in the cell-type signature matrix. We would suggest start with the default thresholds and modify them by comparing the results
247 with the original staining. The two vectors are required for defining the “high_expression_threshold”, one for anchor cells and one for index cells (non-anchor cells).
248 The thresholds define how much the marker expression probability is in order to be considered as expressed.
249
250 (3) For the low thresholds, Normally 1 is assigned to this value unless there are a lot of doublets or co-staining in the data. The Low expression threshold default
251 values in general are robust, and thus we recommend testing the High expression threshold values.
252
253 `Also save CELESTA object as RDS file` -- Boolean whether to output an RDS file in addition to the default h5ad output
254
255 `Plot combinations of resulting cell type assignments` -- specify any combination of cell types from the cell type signature matrix to plot. This is a repeat element, and one plot will be generated per repitition. There are additional params to control plot aesthetic attributes
256
257 **Outputs**
258
259 `CELESTA assign cells output` -- The primary output, an h5ad file, with new columns containing cell type information. New columns are prepended with `celesta_`
260
261 `CELESTA object RDS` -- optionally output CELESTA object as RDS for downstream analysis in R
262
263 Optional collection of `.png` figures of spatial scatter plots color annotated by cell type assignment
264
265 Visit github.com/plevritis-lab/CELESTA for full documentation
266
267 ]]>
268 </help>
269 <expand macro="citations" />
270 </tool>