view celesta.xml @ 0:8001319743c0 draft

planemo upload for repository https://github.com/goeckslab/tools-mti/tree/main/tools/celesta commit 0ec46718dfd00f37ccae4e2fa133fa8393fe6d92
author goeckslab
date Wed, 28 Aug 2024 12:46:48 +0000
parents
children 44d4c885d9b5
line wrap: on
line source

<tool id="celesta" name="CELESTA cell typing" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Cell type identification with spatial information</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="celesta_requirements"/>
    <expand macro="macro_stdio" />
    <version_command>echo "@VERSION@"</version_command>
    <command detect_errors="aggressive">
        <![CDATA[
        #if str($runmode.selected_mode) == 'plot_expression':
            Rscript '$__tool_directory__/celesta_plot_expression.R'
                --imagingdata '$anndata'
                --prior '$prior_info'
                --xcol '$x_coord'
                --ycol '$y_coord'
                --size '$test_size' 
                --height '$height' 
                --width '$width' 
        #if str($filter_cells.filter) == 'filter':
                --filter
                --lowfilter '$low_threshold'
                --highfilter '$high_threshold'
        #end if
        #else if str($runmode.selected_mode) == 'assign_cells':
            Rscript '$__tool_directory__/celesta_assign_cells.R'
                --imagingdata '$anndata'
                --prior '$prior_info'
                --xcol '$x_coord' 
                --ycol '$y_coord'  
                --maxiteration '$max_iteration' 
                --changethresh '$cell_change_threshold' 
            #if str($filter_cells.filter) == 'filter':
                    --filter
                    --lowfilter '$low_threshold'
                    --highfilter '$high_threshold'
            #end if
            #if $low_thresholds_file:
                    --lowexpthresh '$low_thresholds_file'
            #end if
            #if $high_thresholds_file:
                    --highexpthresh '$high_thresholds_file'
            #end if  
            #for $p in $plot_cells:
                && Rscript '$__tool_directory__/celesta_plot_cells.R'
                    --prior '$prior_info'
                    --celltypes '${p.cell_types}'
                    --size '$p.test_size' 
                    --height '$p.height' 
                    --width '$p.width' 
                    --dpi '$p.dpi' 
            #end for
        #end if
        ]]>
    </command>
    <configfiles>
        <inputs name="inputs" />
    </configfiles>
    <inputs>
        <param name="anndata" type="data" format="h5ad" label="Input anndata" />
        <param name="prior_info" type="data" format="csv" label="Cell-type signature matrix" />
        <conditional name="runmode">
            <param name="selected_mode" type="select" label="Select which CELESTA mode to run">
                <option value="plot_expression" selected="true">Plot expression probabilities for markers in the cell type signature matrix</option>
                <option value="assign_cells">Run the cell type assignment</option>
            </param>
            <when value="plot_expression">
                <expand macro="celesta_base_options" />
                <section name="figure_options" title="Figure Options" expanded="true">
                    <param argument="test_size" type="float" value="1" min="0.1" max="10" label="Specify the point size for plotting cells" />
                    <param argument="height" type="integer" value="4" min="4" max="20" label="Specify the height of the figure (inches)" />
                    <param argument="width" type="integer" value="5" min="4" max="20" label="Specify the width of the figure (inches)" />
                </section>
            </when>
            <when value="assign_cells">
                <expand macro="celesta_base_options" />
                <section name="options" title="Advanced Options" expanded="false">
                    <param argument="max_iteration" type="integer" value="10" label="Define the maximum iterations allowed in the EM algorithm per round" />
                    <param argument="cell_change_threshold" type="float" value="0.01" label="Define an ending condition for the EM algorithm" help="0.01 means that when fewer than 1% of the total number of cells do not change identity, the algorithm will stop" />
                    <param name="low_thresholds_file" type="data" format="csv" optional="true" label="Provide a file mapping low anchor and index cell assignment thresholds to cell types" />
                    <param name="high_thresholds_file" type="data" format="csv" optional="true" label="Provide a file mapping high anchor and index cell assignment thresholds to cell types" />
                    <param name="save_rds" type="boolean" checked="false" label="Also save CELESTA object as RDS file" help="Saving CELESTA object as RDS can allow for easier downstream analysis in R" />
                </section>
                <repeat name="plot_cells" title="Plot combinations of resulting cell type assignments" min="0">
                    <param name="cell_types" type="text" label="Provide a comma-separated list of cell type names to plot together">                    
                        <sanitizer>
                            <valid initial="string.printable"/>
                        </sanitizer>
                    </param>
                    <param argument="test_size" type="float" value="1" min="0.1" max="10" label="Specify the point size for plotting cells" />
                    <param argument="height" type="integer" value="12" min="4" max="20" label="Specify the height of the figure (inches)" />
                    <param argument="width" type="integer" value="12" min="4" max="20" label="Specify the width of the figure (inches)" />
                    <param argument="dpi" type="integer" value="300" min="50" max="500" label="Specify the DPI of the figure" />
                </repeat>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <collection name="marker_expression_plots" type="list" label="Marker expression probability plots">
            <discover_datasets pattern="__name_and_ext__" directory="marker_exp_plots" ext="png" />
            <filter>runmode['selected_mode'] == "plot_expression"</filter>
        </collection>     
        <data name="assign_cells_output" format="h5ad" label="CELESTA assign cells output" from_work_dir="result.h5ad" >
            <filter>runmode['selected_mode'] == "assign_cells"</filter>
        </data>
        <data name="assign_cells_rds" format="rds" label="CELESTA object RDS" from_work_dir="celestaobj.rds" >
            <filter>runmode['selected_mode'] == "assign_cells" and runmode['options']['save_rds']</filter>
        </data>
        <collection name="cell_assign_plots" type="list" label="Cell assignment plots">
            <discover_datasets pattern="__name_and_ext__" directory="cell_assign_plots" ext="png" />
            <filter>runmode['selected_mode'] == "assign_cells" and len(runmode['plot_cells']) != 0</filter>
        </collection>     
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="anndata" value="celesta_image.h5ad" />
            <param name="prior_info" value="celesta_prior.csv" />
            <conditional name="runmode">
                <param name="selected_mode" value="plot_expression" />
            </conditional>
            <output_collection name="marker_expression_plots" type="list" count="18">
                <element name="CD31_VASCULATURE_CYC_19_CH_3_exp_prob" file="CD31_VASCULATURE_CYC_19_CH_3_exp_prob.png" compare="sim_size" />
            </output_collection>
        </test>
        <test expect_num_outputs="1">
            <param name="anndata" value="celesta_image.h5ad" />
            <param name="prior_info" value="celesta_prior.csv" />
            <conditional name="runmode">
                <param name="selected_mode" value="assign_cells" />
            </conditional>
            <output name="assign_cells_output">
                <assert_contents>
                    <has_h5_keys keys="obs/celesta_final_cell_type" />
                </assert_contents>
            </output>
            <assert_stdout>
                <has_text text="vasculature 273" />
            </assert_stdout>
        </test>
        <test expect_num_outputs="3">
            <param name="anndata" value="celesta_image.h5ad" />
            <param name="prior_info" value="celesta_prior.csv" />
            <param name="filter" value="filter" />
            <conditional name="runmode">
                <param name="selected_mode" value="assign_cells" />
            </conditional>
            <param name="high_thresholds_file" value="celesta_high_exp_thresholds.csv" />
            <repeat name="plot_cells">
                <param name="cell_types" value="vasculature" />
            </repeat>
            <param name="save_rds" value="true" />
            <output name="assign_cells_output">
                <assert_contents>
                    <has_h5_keys keys="obs/celesta_final_cell_type" />
                </assert_contents>
            </output>
            <output_collection name="cell_assign_plots" type="list" count="1">
                <element name="plot_cells_vasculature" file="plot_cells_vasculature.png" compare="sim_size" />
            </output_collection>
            <output name="assign_cells_rds">
                <assert_contents>
                    <has_size value="1400000" delta="100000" />
                </assert_contents>
            </output>
            <assert_stdout>
                <has_text text="vasculature 168" />
            </assert_stdout>
        </test>
    </tests>
    <help>
        <![CDATA[
**What it does**

CELESTA (CELl typE identification with SpaTiAl information) is an algorithm aiming to perform
automated cell type identification for multiplexed in situ imaging data. 

CELESTA makes use of both protein expressions and cell spatial neighborhood information 
from segmented imaging data for the cell type identification.

This Galaxy implementation of CELESTA has two run modes:

**Both run modes share the following inputs**

`Input Anndata` -- anndata h5ad file where cells are rows, with marker expression in adata.X and cell coordinates in adata.obs 

`Cell-type signature matrix` -- Comma-separated text file containing the following information and formatting: 

(1) The first column has to contain the cell types to be inferred

(2) The second column has the lineage information for each cell type. The lineage information has three numbers 
    connected by “_” (underscore). The first number indicates round. Cell types with the same lineage level are 
    inferred at the same round. Increasing number indicates increasing cell-type resolution. For example, 
    immune cells -> CD3+ T cells –> CD4+ T cells. The third number is a number assigned to the cell type, 
    i.e, cell type number. The middle number tells the previous lineage cell type number for the current cell type. 
    For example, the middle number for CD3+ T cells is 5, because it is a subtype of immune cells which have cell 
    type number assigned to 5.

(3) Starting from column three, each column is a protein marker. If the protein marker is known to be expressed 
    for that cell type, then it is denoted by “1”. If the protein marker is known to not express for a cell type, 
    then it is denoted by “0”. If the protein marker is irrelevant or uncertain to express for a cell type, 
    then it is denoted by “NA”.

`Name of anndata.obs key containing cell or nucleus centroid X position` -- if using output from MCMICRO, this would be 'X_centroid'

`Name of anndata.obs key containing cell or nucleus centroid Y position` -- if using output from MCMICRO, this would be 'Y_centroid'

`Choose whether to filter cells` -- Boolean whether to filter out cells with extreme low or high marker intensity that fall outside of thresholds (`CELESTA::FilterCells()`)

`Set the low threshold for filtering cells` -- high_marker_threshold param in `CELESTA::FilterCells()`

`Set the high threshold for filtering cells` -- low_marker_threshold param in `CELESTA::FilterCells()`

**Run modes**

1. Plot expression probabilities for markers in the cell type signature matrix

    This run mode generates marker expression probability plots for every marker in the cell-type signature matrix. 

    **Additional inputs**

    `Specify the point size for plotting cells` -- passed to `ggplot2::geom_point()` size param

    `Specify the height of the figure (inches)` -- passed to `ggplot2::ggsave()` height param 

    `Specify the width of the figure (inches)` -- passed to `ggplot2::ggsave()` width param 

    **Outputs**

    Collection of `.png` figures showing marker intensity probabilities as spatial scatter plots

2. Run the cell type assignment

    **Additional inputs**

    `Define the maximum iterations allowed in the EM algorithm per round` -- passed to `CELESTA::AssignCells()` max_iteration param

    `Define an ending condition for the EM algorithm` -- passed to `CELESTA::AssignCells()` cell_change_threshold param 

    `Provide a file mapping low/high anchor and index cell assignment thresholds to cell types` -- comma separated text file containing following information and formatting:

(1) First column contains cell types to be inferred (same order as the cell type signature matrix)
    Second column is named `anchor` and contains high or low thresholds for anchor cells 
    Third column is named `index` and contains high or low thresholds for index cells 

(2) In the `CELESTA::AssignCells()` function, it requires four vectors to define the high and low thresholds for each cell type. The length of the vector equals to the 
    total number of cell types defined in the cell-type signature matrix. We would suggest start with the default thresholds and modify them by comparing the results 
    with the original staining. The two vectors are required for defining the “high_expression_threshold”, one for anchor cells and one for index cells (non-anchor cells). 
    The thresholds define how much the marker expression probability is in order to be considered as expressed.

(3) For the low thresholds, Normally 1 is assigned to this value unless there are a lot of doublets or co-staining in the data. The Low expression threshold default 
    values in general are robust, and thus we recommend testing the High expression threshold values.

`Also save CELESTA object as RDS file` -- Boolean whether to output an RDS file in addition to the default h5ad output 

`Plot combinations of resulting cell type assignments` -- specify any combination of cell types from the cell type signature matrix to plot. This is a repeat element, and one plot will be generated per repitition. There are additional params to control plot aesthetic attributes

**Outputs**

`CELESTA assign cells output` -- The primary output, an h5ad file, with new columns containing cell type information. New columns are prepended with `celesta_`

`CELESTA object RDS` -- optionally output CELESTA object as RDS for downstream analysis in R 

Optional collection of `.png` figures of spatial scatter plots color annotated by cell type assignment 

Visit github.com/plevritis-lab/CELESTA for full documentation 

        ]]>
    </help>
    <expand macro="citations" />
</tool>