changeset 0:8001319743c0 draft

planemo upload for repository https://github.com/goeckslab/tools-mti/tree/main/tools/celesta commit 0ec46718dfd00f37ccae4e2fa133fa8393fe6d92
author goeckslab
date Wed, 28 Aug 2024 12:46:48 +0000
parents
children 44d4c885d9b5
files celesta.xml celesta_assign_cells.R celesta_plot_cells.R celesta_plot_expression.R macros.xml test-data/CD31_VASCULATURE_CYC_19_CH_3_exp_prob.png test-data/celesta_high_exp_thresholds.csv test-data/celesta_image.h5ad test-data/celesta_prior.csv test-data/plot_cells_vasculature.png
diffstat 10 files changed, 680 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/celesta.xml	Wed Aug 28 12:46:48 2024 +0000
@@ -0,0 +1,270 @@
+<tool id="celesta" name="CELESTA cell typing" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Cell type identification with spatial information</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="celesta_requirements"/>
+    <expand macro="macro_stdio" />
+    <version_command>echo "@VERSION@"</version_command>
+    <command detect_errors="aggressive">
+        <![CDATA[
+        #if str($runmode.selected_mode) == 'plot_expression':
+            Rscript '$__tool_directory__/celesta_plot_expression.R'
+                --imagingdata '$anndata'
+                --prior '$prior_info'
+                --xcol '$x_coord'
+                --ycol '$y_coord'
+                --size '$test_size' 
+                --height '$height' 
+                --width '$width' 
+        #if str($filter_cells.filter) == 'filter':
+                --filter
+                --lowfilter '$low_threshold'
+                --highfilter '$high_threshold'
+        #end if
+        #else if str($runmode.selected_mode) == 'assign_cells':
+            Rscript '$__tool_directory__/celesta_assign_cells.R'
+                --imagingdata '$anndata'
+                --prior '$prior_info'
+                --xcol '$x_coord' 
+                --ycol '$y_coord'  
+                --maxiteration '$max_iteration' 
+                --changethresh '$cell_change_threshold' 
+            #if str($filter_cells.filter) == 'filter':
+                    --filter
+                    --lowfilter '$low_threshold'
+                    --highfilter '$high_threshold'
+            #end if
+            #if $low_thresholds_file:
+                    --lowexpthresh '$low_thresholds_file'
+            #end if
+            #if $high_thresholds_file:
+                    --highexpthresh '$high_thresholds_file'
+            #end if  
+            #for $p in $plot_cells:
+                && Rscript '$__tool_directory__/celesta_plot_cells.R'
+                    --prior '$prior_info'
+                    --celltypes '${p.cell_types}'
+                    --size '$p.test_size' 
+                    --height '$p.height' 
+                    --width '$p.width' 
+                    --dpi '$p.dpi' 
+            #end for
+        #end if
+        ]]>
+    </command>
+    <configfiles>
+        <inputs name="inputs" />
+    </configfiles>
+    <inputs>
+        <param name="anndata" type="data" format="h5ad" label="Input anndata" />
+        <param name="prior_info" type="data" format="csv" label="Cell-type signature matrix" />
+        <conditional name="runmode">
+            <param name="selected_mode" type="select" label="Select which CELESTA mode to run">
+                <option value="plot_expression" selected="true">Plot expression probabilities for markers in the cell type signature matrix</option>
+                <option value="assign_cells">Run the cell type assignment</option>
+            </param>
+            <when value="plot_expression">
+                <expand macro="celesta_base_options" />
+                <section name="figure_options" title="Figure Options" expanded="true">
+                    <param argument="test_size" type="float" value="1" min="0.1" max="10" label="Specify the point size for plotting cells" />
+                    <param argument="height" type="integer" value="4" min="4" max="20" label="Specify the height of the figure (inches)" />
+                    <param argument="width" type="integer" value="5" min="4" max="20" label="Specify the width of the figure (inches)" />
+                </section>
+            </when>
+            <when value="assign_cells">
+                <expand macro="celesta_base_options" />
+                <section name="options" title="Advanced Options" expanded="false">
+                    <param argument="max_iteration" type="integer" value="10" label="Define the maximum iterations allowed in the EM algorithm per round" />
+                    <param argument="cell_change_threshold" type="float" value="0.01" label="Define an ending condition for the EM algorithm" help="0.01 means that when fewer than 1% of the total number of cells do not change identity, the algorithm will stop" />
+                    <param name="low_thresholds_file" type="data" format="csv" optional="true" label="Provide a file mapping low anchor and index cell assignment thresholds to cell types" />
+                    <param name="high_thresholds_file" type="data" format="csv" optional="true" label="Provide a file mapping high anchor and index cell assignment thresholds to cell types" />
+                    <param name="save_rds" type="boolean" checked="false" label="Also save CELESTA object as RDS file" help="Saving CELESTA object as RDS can allow for easier downstream analysis in R" />
+                </section>
+                <repeat name="plot_cells" title="Plot combinations of resulting cell type assignments" min="0">
+                    <param name="cell_types" type="text" label="Provide a comma-separated list of cell type names to plot together">                    
+                        <sanitizer>
+                            <valid initial="string.printable"/>
+                        </sanitizer>
+                    </param>
+                    <param argument="test_size" type="float" value="1" min="0.1" max="10" label="Specify the point size for plotting cells" />
+                    <param argument="height" type="integer" value="12" min="4" max="20" label="Specify the height of the figure (inches)" />
+                    <param argument="width" type="integer" value="12" min="4" max="20" label="Specify the width of the figure (inches)" />
+                    <param argument="dpi" type="integer" value="300" min="50" max="500" label="Specify the DPI of the figure" />
+                </repeat>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <collection name="marker_expression_plots" type="list" label="Marker expression probability plots">
+            <discover_datasets pattern="__name_and_ext__" directory="marker_exp_plots" ext="png" />
+            <filter>runmode['selected_mode'] == "plot_expression"</filter>
+        </collection>     
+        <data name="assign_cells_output" format="h5ad" label="CELESTA assign cells output" from_work_dir="result.h5ad" >
+            <filter>runmode['selected_mode'] == "assign_cells"</filter>
+        </data>
+        <data name="assign_cells_rds" format="rds" label="CELESTA object RDS" from_work_dir="celestaobj.rds" >
+            <filter>runmode['selected_mode'] == "assign_cells" and runmode['options']['save_rds']</filter>
+        </data>
+        <collection name="cell_assign_plots" type="list" label="Cell assignment plots">
+            <discover_datasets pattern="__name_and_ext__" directory="cell_assign_plots" ext="png" />
+            <filter>runmode['selected_mode'] == "assign_cells" and len(runmode['plot_cells']) != 0</filter>
+        </collection>     
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="anndata" value="celesta_image.h5ad" />
+            <param name="prior_info" value="celesta_prior.csv" />
+            <conditional name="runmode">
+                <param name="selected_mode" value="plot_expression" />
+            </conditional>
+            <output_collection name="marker_expression_plots" type="list" count="18">
+                <element name="CD31_VASCULATURE_CYC_19_CH_3_exp_prob" file="CD31_VASCULATURE_CYC_19_CH_3_exp_prob.png" compare="sim_size" />
+            </output_collection>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="anndata" value="celesta_image.h5ad" />
+            <param name="prior_info" value="celesta_prior.csv" />
+            <conditional name="runmode">
+                <param name="selected_mode" value="assign_cells" />
+            </conditional>
+            <output name="assign_cells_output">
+                <assert_contents>
+                    <has_h5_keys keys="obs/celesta_final_cell_type" />
+                </assert_contents>
+            </output>
+            <assert_stdout>
+                <has_text text="vasculature 273" />
+            </assert_stdout>
+        </test>
+        <test expect_num_outputs="3">
+            <param name="anndata" value="celesta_image.h5ad" />
+            <param name="prior_info" value="celesta_prior.csv" />
+            <param name="filter" value="filter" />
+            <conditional name="runmode">
+                <param name="selected_mode" value="assign_cells" />
+            </conditional>
+            <param name="high_thresholds_file" value="celesta_high_exp_thresholds.csv" />
+            <repeat name="plot_cells">
+                <param name="cell_types" value="vasculature" />
+            </repeat>
+            <param name="save_rds" value="true" />
+            <output name="assign_cells_output">
+                <assert_contents>
+                    <has_h5_keys keys="obs/celesta_final_cell_type" />
+                </assert_contents>
+            </output>
+            <output_collection name="cell_assign_plots" type="list" count="1">
+                <element name="plot_cells_vasculature" file="plot_cells_vasculature.png" compare="sim_size" />
+            </output_collection>
+            <output name="assign_cells_rds">
+                <assert_contents>
+                    <has_size value="1400000" delta="100000" />
+                </assert_contents>
+            </output>
+            <assert_stdout>
+                <has_text text="vasculature 168" />
+            </assert_stdout>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+**What it does**
+
+CELESTA (CELl typE identification with SpaTiAl information) is an algorithm aiming to perform
+automated cell type identification for multiplexed in situ imaging data. 
+
+CELESTA makes use of both protein expressions and cell spatial neighborhood information 
+from segmented imaging data for the cell type identification.
+
+This Galaxy implementation of CELESTA has two run modes:
+
+**Both run modes share the following inputs**
+
+`Input Anndata` -- anndata h5ad file where cells are rows, with marker expression in adata.X and cell coordinates in adata.obs 
+
+`Cell-type signature matrix` -- Comma-separated text file containing the following information and formatting: 
+
+(1) The first column has to contain the cell types to be inferred
+
+(2) The second column has the lineage information for each cell type. The lineage information has three numbers 
+    connected by “_” (underscore). The first number indicates round. Cell types with the same lineage level are 
+    inferred at the same round. Increasing number indicates increasing cell-type resolution. For example, 
+    immune cells -> CD3+ T cells –> CD4+ T cells. The third number is a number assigned to the cell type, 
+    i.e, cell type number. The middle number tells the previous lineage cell type number for the current cell type. 
+    For example, the middle number for CD3+ T cells is 5, because it is a subtype of immune cells which have cell 
+    type number assigned to 5.
+
+(3) Starting from column three, each column is a protein marker. If the protein marker is known to be expressed 
+    for that cell type, then it is denoted by “1”. If the protein marker is known to not express for a cell type, 
+    then it is denoted by “0”. If the protein marker is irrelevant or uncertain to express for a cell type, 
+    then it is denoted by “NA”.
+
+`Name of anndata.obs key containing cell or nucleus centroid X position` -- if using output from MCMICRO, this would be 'X_centroid'
+
+`Name of anndata.obs key containing cell or nucleus centroid Y position` -- if using output from MCMICRO, this would be 'Y_centroid'
+
+`Choose whether to filter cells` -- Boolean whether to filter out cells with extreme low or high marker intensity that fall outside of thresholds (`CELESTA::FilterCells()`)
+
+`Set the low threshold for filtering cells` -- high_marker_threshold param in `CELESTA::FilterCells()`
+
+`Set the high threshold for filtering cells` -- low_marker_threshold param in `CELESTA::FilterCells()`
+
+**Run modes**
+
+1. Plot expression probabilities for markers in the cell type signature matrix
+
+    This run mode generates marker expression probability plots for every marker in the cell-type signature matrix. 
+
+    **Additional inputs**
+
+    `Specify the point size for plotting cells` -- passed to `ggplot2::geom_point()` size param
+
+    `Specify the height of the figure (inches)` -- passed to `ggplot2::ggsave()` height param 
+
+    `Specify the width of the figure (inches)` -- passed to `ggplot2::ggsave()` width param 
+
+    **Outputs**
+
+    Collection of `.png` figures showing marker intensity probabilities as spatial scatter plots
+
+2. Run the cell type assignment
+
+    **Additional inputs**
+
+    `Define the maximum iterations allowed in the EM algorithm per round` -- passed to `CELESTA::AssignCells()` max_iteration param
+
+    `Define an ending condition for the EM algorithm` -- passed to `CELESTA::AssignCells()` cell_change_threshold param 
+
+    `Provide a file mapping low/high anchor and index cell assignment thresholds to cell types` -- comma separated text file containing following information and formatting:
+
+(1) First column contains cell types to be inferred (same order as the cell type signature matrix)
+    Second column is named `anchor` and contains high or low thresholds for anchor cells 
+    Third column is named `index` and contains high or low thresholds for index cells 
+
+(2) In the `CELESTA::AssignCells()` function, it requires four vectors to define the high and low thresholds for each cell type. The length of the vector equals to the 
+    total number of cell types defined in the cell-type signature matrix. We would suggest start with the default thresholds and modify them by comparing the results 
+    with the original staining. The two vectors are required for defining the “high_expression_threshold”, one for anchor cells and one for index cells (non-anchor cells). 
+    The thresholds define how much the marker expression probability is in order to be considered as expressed.
+
+(3) For the low thresholds, Normally 1 is assigned to this value unless there are a lot of doublets or co-staining in the data. The Low expression threshold default 
+    values in general are robust, and thus we recommend testing the High expression threshold values.
+
+`Also save CELESTA object as RDS file` -- Boolean whether to output an RDS file in addition to the default h5ad output 
+
+`Plot combinations of resulting cell type assignments` -- specify any combination of cell types from the cell type signature matrix to plot. This is a repeat element, and one plot will be generated per repitition. There are additional params to control plot aesthetic attributes
+
+**Outputs**
+
+`CELESTA assign cells output` -- The primary output, an h5ad file, with new columns containing cell type information. New columns are prepended with `celesta_`
+
+`CELESTA object RDS` -- optionally output CELESTA object as RDS for downstream analysis in R 
+
+Optional collection of `.png` figures of spatial scatter plots color annotated by cell type assignment 
+
+Visit github.com/plevritis-lab/CELESTA for full documentation 
+
+        ]]>
+    </help>
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/celesta_assign_cells.R	Wed Aug 28 12:46:48 2024 +0000
@@ -0,0 +1,153 @@
+# ---------------------------------------------------------------------------------
+# The main algorithim for CELESTA cell type assignment
+# ---------------------------------------------------------------------------------
+
+suppressWarnings(suppressMessages(library(janitor)))
+suppressWarnings(suppressMessages(library(optparse)))
+suppressWarnings(suppressMessages(library(dplyr)))
+suppressWarnings(suppressMessages(library(anndataR)))
+suppressWarnings(suppressMessages(library(Rmixmod)))
+suppressWarnings(suppressMessages(library(spdep)))
+suppressWarnings(suppressMessages(library(ggplot2)))
+suppressWarnings(suppressMessages(library(reshape2)))
+suppressWarnings(suppressMessages(library(zeallot)))
+suppressWarnings(suppressMessages(library(CELESTA)))
+
+### Define command line arguments
+option_list <- list(
+  make_option(c("-i", "--imagingdata"), action = "store", default = NA, type = "character",
+              help = "Path to imaging data"),
+  make_option(c("-p", "--prior"), action = "store", default = NA, type = "character",
+              help = "Path to prior marker info file"),
+  make_option(c("-x", "--xcol"), action = "store", default = NA, type = "character",
+              help = "Name of column in adata.obs containing X coordinate"),
+  make_option(c("-y", "--ycol"), action = "store", default = NA, type = "character",
+              help = "Name of column in adata.obs containing Y coordinate"),
+  make_option(c("--filter"), action = "store_true", type = "logical", default = FALSE,
+              help = "Boolean to filter cells or not (default: no filtering)"),
+  make_option(c("--highfilter"), action = "store", default = 0.9, type = "double",
+              help = "High marker threshold if filtering cells (default: 0.9)"),
+  make_option(c("--lowfilter"), action = "store", default = 0.4, type = "double",
+              help = "Low marker threshold if filtering cells (default: 0.4)"),
+  make_option(c("--maxiteration"), action = "store", default = 10, type = "integer",
+              help = "Maximum iterations allowed in the EM algorithm per round"),
+  make_option(c("--changethresh"), action = "store", default = 0.01, type = "double",
+              help = "Ending condition for the EM algorithm"),
+  make_option(c("--highexpthresh"), action = "store", default = "default_high_thresholds", type = "character",
+              help = "Path to file specifying high expression thresholds for anchor and index cells"),
+  make_option(c("--lowexpthresh"), action = "store", default = "default_low_thresholds", type = "character",
+              help = "Path to file specifying low expression thresholds for anchor and index cells")
+)
+
+### Functions
+anndata_to_celesta <- function(input_adata, x_col, y_col) {
+
+  #' Function to convert anndata object to dataframe readable by CELESTA
+  #' Coordinates columns in adata.obs are renamed to "X" and "Y", and placed at beginning of dataframe
+  #' Marker intensities are concatted columnwise to the X and Y coords. cols: X,Y,Marker_1,...Marker N
+
+  # initialize output as dataframe from adata.obs
+  celesta_input_dataframe <- data.frame(input_adata$obs)
+
+  # subset to X and Y coordinates from obs only
+  celesta_input_dataframe <- celesta_input_dataframe %>%
+    dplyr::select({{x_col}}, {{y_col}})
+
+  # rename X,Y column names to what CELESTA wants
+  colnames(celesta_input_dataframe) <- c("X", "Y")
+
+  # merge X,Y coords with marker intensities from adata.X
+  x_df <- data.frame(input_adata$X)
+  colnames(x_df) <- input_adata$var_names
+  celesta_input_dataframe <- cbind(celesta_input_dataframe, x_df)
+
+  return(celesta_input_dataframe)
+}
+
+### Main
+# parse args
+opt <- parse_args(OptionParser(option_list = option_list))
+
+# read anndata, convert to celesta format
+adata <- read_h5ad(opt$imagingdata)
+celesta_input_df <- anndata_to_celesta(adata, x_col = opt$xcol, y_col = opt$ycol)
+
+# read prior marker info
+prior <- read.csv(opt$prior, check.names = FALSE)
+
+# clean prior names and input dataframe names
+prior <- janitor::clean_names(prior, case = "all_caps")
+celesta_input_df <- janitor::clean_names(celesta_input_df, case = "all_caps")
+
+# instantiate celesta object
+CelestaObj <- CreateCelestaObject(
+  project_title = "",
+  prior_marker_info = prior,
+  imaging_data_file = celesta_input_df
+)
+
+# if filtering is specified, filter out cells outside high and low thresholds
+if (opt$filter) {
+  print("filtering cells based on expression")
+  CelestaObj <- FilterCells(CelestaObj,
+                            high_marker_threshold = opt$highfilter,
+                            low_marker_threshold = opt$lowfilter)
+} else {
+  print("Proceeding to cell type assignment without cell filtering")
+}
+
+# check for non-default expression threshold files
+if (opt$highexpthresh != "default_high_thresholds") {
+  # read high thresholds
+  print("Using custom high expression thresholds")
+  high_expression_thresholds <- read.csv(opt$highexpthresh)
+  hi_exp_thresh_anchor <- high_expression_thresholds$anchor
+  hi_exp_thresh_index <- high_expression_thresholds$index
+} else {
+  print("Using default high expression thresholds -- this may need adjustment")
+  hi_exp_thresh_anchor <- rep(0.7, length = 50)
+  hi_exp_thresh_index <- rep(0.5, length = 50)
+}
+
+if (opt$lowexpthresh != "default_low_thresholds") {
+  # read low thresholds
+  print("Using custom low expression thresholds")
+  low_expression_thresholds <- read.csv(opt$highexpthresh)
+  low_exp_thresh_anchor <- low_expression_thresholds$anchor
+  low_exp_thresh_index <- low_expression_thresholds$index
+} else {
+  print("Using default low expression thresholds")
+  low_exp_thresh_anchor <- rep(0.9, length = 50)
+  low_exp_thresh_index <- rep(1, length = 50)
+}
+
+# run cell type assignment
+CelestaObj <- AssignCells(CelestaObj,
+                          max_iteration = opt$maxiteration,
+                          cell_change_threshold = opt$changethresh,
+                          high_expression_threshold_anchor = hi_exp_thresh_anchor,
+                          low_expression_threshold_anchor = low_exp_thresh_anchor,
+                          high_expression_threshold_index = hi_exp_thresh_index,
+                          low_expression_threshold_index = low_exp_thresh_index,
+                          save_result = FALSE)
+
+# save object as an RDS file for cell type plotting
+# for the time being, this is not exposed to Galaxy
+saveRDS(CelestaObj, file = "celestaobj.rds")
+
+# rename celesta assignment columns so they are obvious in output anndata
+celesta_assignments <- CelestaObj@final_cell_type_assignment
+celesta_assignments <- janitor::clean_names(celesta_assignments)
+colnames(celesta_assignments) <- paste0("celesta_", colnames(celesta_assignments))
+
+# merge celesta assignments into anndata object
+adata$obs <- cbind(adata$obs, celesta_assignments)
+
+# print cell type value_counts to standard output
+print("----------------------------------------")
+print("Final cell type counts")
+print(adata$obs %>% dplyr::count(celesta_final_cell_type, sort = TRUE))
+print("----------------------------------------")
+
+# write output anndata file
+write_h5ad(adata, "result.h5ad")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/celesta_plot_cells.R	Wed Aug 28 12:46:48 2024 +0000
@@ -0,0 +1,76 @@
+# ---------------------------------------------------------------------------------
+# Plot assigned cell type combinations with CELESTA
+# ---------------------------------------------------------------------------------
+
+suppressWarnings(suppressMessages(library(janitor)))
+suppressWarnings(suppressMessages(library(optparse)))
+suppressWarnings(suppressMessages(library(dplyr)))
+suppressWarnings(suppressMessages(library(anndataR)))
+suppressWarnings(suppressMessages(library(Rmixmod)))
+suppressWarnings(suppressMessages(library(spdep)))
+suppressWarnings(suppressMessages(library(ggplot2)))
+suppressWarnings(suppressMessages(library(reshape2)))
+suppressWarnings(suppressMessages(library(zeallot)))
+suppressWarnings(suppressMessages(library(CELESTA)))
+
+# define command line args
+option_list <- list(
+  make_option(c("-r", "--rds"), action = "store", default = "celestaobj.rds", type = "character",
+              help = "Path to CelestaObj RDS"),
+  make_option(c("-p", "--prior"), action = "store", default = NA, type = "character",
+              help = "Path to prior marker info file"),
+  make_option(c("-c", "--celltypes"), action = "store", default = NA, type = "character",
+              help = "Comma-separated list of cell types to plot"),
+  make_option(c("-s", "--size"), action = "store", default = 1, type = "double",
+              help = "Point size for plotting"),
+  make_option(c("--width"), action = "store", default = 12, type = "integer",
+              help = "Width of plot (inches)"),
+  make_option(c("--height"), action = "store", default = 12, type = "integer",
+              help = "Height of plot (inches)"),
+  make_option(c("--dpi"), action = "store", default = 300, type = "integer",
+              help = "DPI (dots per inch) of plot")
+)
+
+# parse args
+opt <- parse_args(OptionParser(option_list = option_list))
+
+CelestaObj <- readRDS(opt$rds)
+cell_types_to_plot <- strsplit(opt$celltypes, ",")[[1]]
+
+# read prior marker info
+prior <- read.csv(opt$prior, row.names = 1)
+
+# get indices of cell types to plot from the prior marker table
+cell_type_indices <- which(row.names(prior) %in% cell_types_to_plot)
+
+print(cell_types_to_plot)
+print(cell_type_indices)
+
+print(row.names(prior))
+
+# create output directory if it doesn"t already exist
+dir.create("cell_assign_plots")
+
+# create the cell type plot
+g <- PlotCellsAnyCombination(cell_type_assignment_to_plot = CelestaObj@final_cell_type_assignment[, (CelestaObj@total_rounds + 1)],
+                             coords = CelestaObj@coords,
+                             prior_info = prior_marker_info,
+                             cell_number_to_use = cell_type_indices,
+                             test_size = 1,
+                             save_plot = FALSE)
+
+# create a unique output name for the plot based on the input cell types
+cell_types_cleaned <- paste(make_clean_names(cell_types_to_plot), collapse = "")
+output_name <- paste(c("plot_cells_", cell_types_cleaned, ".png"), collapse = "")
+
+# save to subdir
+# FIXME: may want to expose plotting params to galaxy
+ggsave(
+  path = "cell_assign_plots",
+  filename = output_name,
+  plot = g,
+  width = opt$width,
+  height = opt$height,
+  units = "in",
+  dpi = opt$dpi
+)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/celesta_plot_expression.R	Wed Aug 28 12:46:48 2024 +0000
@@ -0,0 +1,108 @@
+# --------------------------------------------------------------------------------------------
+# Plot marker expression probabilities for cell assignment parameter optimization with CELESTA
+# --------------------------------------------------------------------------------------------
+
+suppressWarnings(suppressMessages(library(janitor)))
+suppressWarnings(suppressMessages(library(optparse)))
+suppressWarnings(suppressMessages(library(dplyr)))
+suppressWarnings(suppressMessages(library(anndataR)))
+suppressWarnings(suppressMessages(library(Rmixmod)))
+suppressWarnings(suppressMessages(library(spdep)))
+suppressWarnings(suppressMessages(library(ggplot2)))
+suppressWarnings(suppressMessages(library(reshape2)))
+suppressWarnings(suppressMessages(library(zeallot)))
+suppressWarnings(suppressMessages(library(CELESTA)))
+
+### Define command line arguments
+option_list <- list(
+  make_option(c("-i", "--imagingdata"), action = "store", default = NA, type = "character",
+              help = "Path to imaging data"),
+  make_option(c("-p", "--prior"), action = "store", default = NA, type = "character",
+              help = "Path to prior marker info file"),
+  make_option(c("-x", "--xcol"), action = "store", default = NA, type = "character",
+              help = "Name of column in adata.obs containing X coordinate"),
+  make_option(c("-y", "--ycol"), action = "store", default = NA, type = "character",
+              help = "Name of column in adata.obs containing Y coordinate"),
+  make_option(c("--filter"), action = "store_true", type = "logical", default = FALSE,
+              help = "Boolean to filter cells or not (default: no filtering)"),
+  make_option(c("--highfilter"), action = "store", default = 0.9, type = "double",
+              help = "High marker threshold if filtering cells (default: 0.9)"),
+  make_option(c("--lowfilter"), action = "store", default = 0.4, type = "double",
+              help = "Low marker threshold if filtering cells (default: 0.4)"),
+  make_option(c("-s", "--size"), action = "store", default = 1, type = "double",
+              help = "Point size for plotting"),
+  make_option(c("--width"), action = "store", default = 5, type = "integer",
+              help = "Width of plot (inches)"),
+  make_option(c("--height"), action = "store", default = 4, type = "integer",
+              help = "Height of plot (inches)")
+)
+
+### Functions
+anndata_to_celesta <- function(input_adata, x_col, y_col) {
+
+  #' Function to convert anndata object to dataframe readable by CELESTA
+  #' Coordinates columns in adata.obs are renamed to "X" and "Y", and placed at beginning of dataframe
+  #' Marker intensities are concatted columnwise to the X and Y coords. cols: X,Y,Marker_1,...Marker N
+
+  # initialize output as dataframe from adata.obs
+  celesta_input_dataframe <- data.frame(input_adata$obs)
+
+  # subset to X and Y coordinates from obs only
+  celesta_input_dataframe <- celesta_input_dataframe %>%
+    dplyr::select({{x_col}}, {{y_col}})
+
+  # rename X,Y column names to what CELESTA wants
+  colnames(celesta_input_dataframe) <- c("X", "Y")
+
+  # merge X,Y coords with marker intensities from adata.X
+  x_df <- data.frame(input_adata$X)
+  colnames(x_df) <- input_adata$var_names
+  celesta_input_dataframe <- cbind(celesta_input_dataframe, x_df)
+
+  return(celesta_input_dataframe)
+}
+
+### Main
+# parse args
+opt <- parse_args(OptionParser(option_list = option_list))
+
+# read anndata, convert to celesta format
+adata <- read_h5ad(opt$imagingdata)
+celesta_input_df <- anndata_to_celesta(adata, x_col = opt$xcol, y_col = opt$ycol)
+
+# read prior marker info
+prior <- read.csv(opt$prior, check.names = FALSE)
+
+# clean prior names, keeping a copy of originals for writing output
+prior_original_names <- colnames(prior)
+prior <- janitor::clean_names(prior, case = "all_caps")
+
+# clean input dataframe names
+celesta_input_df <- janitor::clean_names(celesta_input_df, case = "all_caps")
+
+# instantiate celesta object
+CelestaObj <- CreateCelestaObject(
+  project_title = "",
+  prior_marker_info = prior,
+  imaging_data_file = celesta_input_df
+)
+
+# if filtering is specified, filter out cells outside high and low thresholds
+if (opt$filter) {
+  print("filtering cells based on expression")
+  CelestaObj <- FilterCells(CelestaObj,
+                            high_marker_threshold = opt$highfilter,
+                            low_marker_threshold = opt$lowfilter)
+} else {
+  print("Proceeding to marker expression plotting without cell filtering")
+}
+
+# create output directory if it does not already exist
+dir.create("marker_exp_plots")
+
+# plot expression probability
+PlotExpProb(coords = CelestaObj@coords,
+            marker_exp_prob = CelestaObj@marker_exp_prob,
+            prior_marker_info = CelestaObj@prior_info,
+            save_plot = TRUE,
+            output_dir = "./marker_exp_plots")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Wed Aug 28 12:46:48 2024 +0000
@@ -0,0 +1,39 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.0.0.9</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">20.01</token>
+
+    <xml name="celesta_requirements">
+        <requirements>
+            <container type="docker">quay.io/goeckslab/celesta:@TOOL_VERSION@</container>
+            <yield />
+        </requirements>
+    </xml>
+
+    <xml name="macro_stdio">
+        <stdio>
+            <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" />
+        </stdio>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/s41592-022-01498-z</citation>
+        </citations>
+    </xml>
+    <xml name="celesta_base_options" token_label="celesta_base_options">
+        <param name="x_coord" type="text" value="X_centroid" optional="false" label="Name of anndata.obs key containing cell or nucleus centroid X position" />
+        <param name="y_coord" type="text" value="Y_centroid" optional="false" label="Name of anndata.obs key containing cell or nucleus centroid Y position" />
+        <conditional name="filter_cells">
+            <param name="filter" type="select" label="Choose whether to filter cells" help="FilterCells">
+                <option value="no_filter" selected="true">Do not filter cells</option>
+                <option value="filter">Filter cells based on marker intensity</option>
+            </param>
+            <when value="no_filter">
+            </when>
+            <when value="filter">
+                <param name="low_threshold" type="float" value="0.4" optional="false" label="Set the low threshold for filtering cells" help="Cells below low threshold will be filtered out" />
+                <param name="high_threshold" type="float" value="0.9" optional="false" label="Set the high threshold for filtering cells" help="Cells above high threshold will be filtered out" />
+            </when>
+        </conditional>
+    </xml>
+</macros>
Binary file test-data/CD31_VASCULATURE_CYC_19_CH_3_exp_prob.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/celesta_high_exp_thresholds.csv	Wed Aug 28 12:46:48 2024 +0000
@@ -0,0 +1,17 @@
+,anchor,index
+vasculature,0.9,0.85
+tumor cells,0,0
+aSMA+ stroma,0.8,0.8
+lymphatics,0.95,0.95
+immune cells,0.1,0
+CD3+ T cells,0.6,0.6
+CD15+ granulocytes,0.7,0.7
+B cells,0.95,0.95
+CD11c+ DCs,1,1
+CD68+CD163+ macrophages,0.7,0.6
+plasma cells,0.7,0.7
+NK cells,0.95,0.95
+CD8+ T cells,0.7,0.7
+CD4+ T cells,0.7,0.7
+CD4+ T cells CD45RO+,0.9,0.7
+Tregs,0.9,0.9
\ No newline at end of file
Binary file test-data/celesta_image.h5ad has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/celesta_prior.csv	Wed Aug 28 12:46:48 2024 +0000
@@ -0,0 +1,17 @@
+,Lineage_level,CD31 - vasculature:Cyc_19_ch_3,CD34 - vasculature:Cyc_20_ch_3,Cytokeratin - epithelia:Cyc_10_ch_2,aSMA - smooth muscle:Cyc_11_ch_2,Podoplanin - lymphatics:Cyc_19_ch_4,CD45 - hematopoietic cells:Cyc_4_ch_2,CD15 - granulocytes:Cyc_14_ch_2,CD3 - T cells:Cyc_16_ch_4,CD20 - B cells:Cyc_8_ch_3,CD11c - DCs:Cyc_12_ch_3,CD163 - macrophages:Cyc_17_ch_3,CD68 - macrophages:Cyc_18_ch_4,CD38 - multifunctional:Cyc_20_ch_4,CD56 - NK cells:Cyc_10_ch_4,CD8 - cytotoxic T cells:Cyc_3_ch_2,CD4 - T helper cells:Cyc_6_ch_3,CD45RO - memory cells:Cyc_18_ch_3,FOXP3 - regulatory T cells:Cyc_2_ch_3
+vasculature,1_0_1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+tumor cells,1_0_2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+aSMA+ stroma,1_0_3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+lymphatics,1_0_4,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
+immune cells,1_0_5,0,0,0,0,0,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+CD3+ T cells,2_5_6,0,0,0,0,0,NA,0,1,0,0,0,0,0,0,NA,NA,NA,NA
+CD15+ granulocytes,2_5_7,0,0,0,0,0,NA,1,0,0,0,0,NA,0,0,0,0,NA,0
+B cells,2_5_8,0,0,0,0,0,NA,0,0,1,0,0,0,0,0,0,0,0,0
+CD11c+ DCs,2_5_9,0,0,0,0,0,NA,0,0,0,1,0,0,0,0,0,0,0,0
+CD68+CD163+ macrophages,2_5_10,0,0,0,0,0,NA,0,0,0,0,1,1,0,0,0,0,NA,0
+plasma cells,2_5_11,0,0,0,0,0,NA,0,0,0,0,0,0,1,0,0,0,0,0
+NK cells,2_5_12,0,0,0,0,0,NA,0,NA,0,0,0,0,0,1,0,0,0,0
+CD8+ T cells,3_6_13,0,0,0,0,0,NA,0,NA,0,0,0,0,0,0,1,0,NA,0
+CD4+ T cells,3_6_14,0,0,0,0,0,NA,0,NA,0,0,0,0,0,0,0,1,NA,NA
+CD4+ T cells CD45RO+,4_14_15,0,0,0,0,0,NA,0,NA,0,0,0,0,0,0,0,NA,1,0
+Tregs,4_14_16,0,0,0,0,0,NA,0,NA,0,0,0,0,0,0,0,NA,0,1
\ No newline at end of file
Binary file test-data/plot_cells_vasculature.png has changed