Mercurial > repos > goeckslab > celesta
changeset 0:8001319743c0 draft
planemo upload for repository https://github.com/goeckslab/tools-mti/tree/main/tools/celesta commit 0ec46718dfd00f37ccae4e2fa133fa8393fe6d92
author | goeckslab |
---|---|
date | Wed, 28 Aug 2024 12:46:48 +0000 |
parents | |
children | 44d4c885d9b5 |
files | celesta.xml celesta_assign_cells.R celesta_plot_cells.R celesta_plot_expression.R macros.xml test-data/CD31_VASCULATURE_CYC_19_CH_3_exp_prob.png test-data/celesta_high_exp_thresholds.csv test-data/celesta_image.h5ad test-data/celesta_prior.csv test-data/plot_cells_vasculature.png |
diffstat | 10 files changed, 680 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/celesta.xml Wed Aug 28 12:46:48 2024 +0000 @@ -0,0 +1,270 @@ +<tool id="celesta" name="CELESTA cell typing" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>Cell type identification with spatial information</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="celesta_requirements"/> + <expand macro="macro_stdio" /> + <version_command>echo "@VERSION@"</version_command> + <command detect_errors="aggressive"> + <![CDATA[ + #if str($runmode.selected_mode) == 'plot_expression': + Rscript '$__tool_directory__/celesta_plot_expression.R' + --imagingdata '$anndata' + --prior '$prior_info' + --xcol '$x_coord' + --ycol '$y_coord' + --size '$test_size' + --height '$height' + --width '$width' + #if str($filter_cells.filter) == 'filter': + --filter + --lowfilter '$low_threshold' + --highfilter '$high_threshold' + #end if + #else if str($runmode.selected_mode) == 'assign_cells': + Rscript '$__tool_directory__/celesta_assign_cells.R' + --imagingdata '$anndata' + --prior '$prior_info' + --xcol '$x_coord' + --ycol '$y_coord' + --maxiteration '$max_iteration' + --changethresh '$cell_change_threshold' + #if str($filter_cells.filter) == 'filter': + --filter + --lowfilter '$low_threshold' + --highfilter '$high_threshold' + #end if + #if $low_thresholds_file: + --lowexpthresh '$low_thresholds_file' + #end if + #if $high_thresholds_file: + --highexpthresh '$high_thresholds_file' + #end if + #for $p in $plot_cells: + && Rscript '$__tool_directory__/celesta_plot_cells.R' + --prior '$prior_info' + --celltypes '${p.cell_types}' + --size '$p.test_size' + --height '$p.height' + --width '$p.width' + --dpi '$p.dpi' + #end for + #end if + ]]> + </command> + <configfiles> + <inputs name="inputs" /> + </configfiles> + <inputs> + <param name="anndata" type="data" format="h5ad" label="Input anndata" /> + <param name="prior_info" type="data" format="csv" label="Cell-type signature matrix" /> + <conditional name="runmode"> + <param name="selected_mode" type="select" label="Select which CELESTA mode to run"> + <option value="plot_expression" selected="true">Plot expression probabilities for markers in the cell type signature matrix</option> + <option value="assign_cells">Run the cell type assignment</option> + </param> + <when value="plot_expression"> + <expand macro="celesta_base_options" /> + <section name="figure_options" title="Figure Options" expanded="true"> + <param argument="test_size" type="float" value="1" min="0.1" max="10" label="Specify the point size for plotting cells" /> + <param argument="height" type="integer" value="4" min="4" max="20" label="Specify the height of the figure (inches)" /> + <param argument="width" type="integer" value="5" min="4" max="20" label="Specify the width of the figure (inches)" /> + </section> + </when> + <when value="assign_cells"> + <expand macro="celesta_base_options" /> + <section name="options" title="Advanced Options" expanded="false"> + <param argument="max_iteration" type="integer" value="10" label="Define the maximum iterations allowed in the EM algorithm per round" /> + <param argument="cell_change_threshold" type="float" value="0.01" label="Define an ending condition for the EM algorithm" help="0.01 means that when fewer than 1% of the total number of cells do not change identity, the algorithm will stop" /> + <param name="low_thresholds_file" type="data" format="csv" optional="true" label="Provide a file mapping low anchor and index cell assignment thresholds to cell types" /> + <param name="high_thresholds_file" type="data" format="csv" optional="true" label="Provide a file mapping high anchor and index cell assignment thresholds to cell types" /> + <param name="save_rds" type="boolean" checked="false" label="Also save CELESTA object as RDS file" help="Saving CELESTA object as RDS can allow for easier downstream analysis in R" /> + </section> + <repeat name="plot_cells" title="Plot combinations of resulting cell type assignments" min="0"> + <param name="cell_types" type="text" label="Provide a comma-separated list of cell type names to plot together"> + <sanitizer> + <valid initial="string.printable"/> + </sanitizer> + </param> + <param argument="test_size" type="float" value="1" min="0.1" max="10" label="Specify the point size for plotting cells" /> + <param argument="height" type="integer" value="12" min="4" max="20" label="Specify the height of the figure (inches)" /> + <param argument="width" type="integer" value="12" min="4" max="20" label="Specify the width of the figure (inches)" /> + <param argument="dpi" type="integer" value="300" min="50" max="500" label="Specify the DPI of the figure" /> + </repeat> + </when> + </conditional> + </inputs> + <outputs> + <collection name="marker_expression_plots" type="list" label="Marker expression probability plots"> + <discover_datasets pattern="__name_and_ext__" directory="marker_exp_plots" ext="png" /> + <filter>runmode['selected_mode'] == "plot_expression"</filter> + </collection> + <data name="assign_cells_output" format="h5ad" label="CELESTA assign cells output" from_work_dir="result.h5ad" > + <filter>runmode['selected_mode'] == "assign_cells"</filter> + </data> + <data name="assign_cells_rds" format="rds" label="CELESTA object RDS" from_work_dir="celestaobj.rds" > + <filter>runmode['selected_mode'] == "assign_cells" and runmode['options']['save_rds']</filter> + </data> + <collection name="cell_assign_plots" type="list" label="Cell assignment plots"> + <discover_datasets pattern="__name_and_ext__" directory="cell_assign_plots" ext="png" /> + <filter>runmode['selected_mode'] == "assign_cells" and len(runmode['plot_cells']) != 0</filter> + </collection> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="anndata" value="celesta_image.h5ad" /> + <param name="prior_info" value="celesta_prior.csv" /> + <conditional name="runmode"> + <param name="selected_mode" value="plot_expression" /> + </conditional> + <output_collection name="marker_expression_plots" type="list" count="18"> + <element name="CD31_VASCULATURE_CYC_19_CH_3_exp_prob" file="CD31_VASCULATURE_CYC_19_CH_3_exp_prob.png" compare="sim_size" /> + </output_collection> + </test> + <test expect_num_outputs="1"> + <param name="anndata" value="celesta_image.h5ad" /> + <param name="prior_info" value="celesta_prior.csv" /> + <conditional name="runmode"> + <param name="selected_mode" value="assign_cells" /> + </conditional> + <output name="assign_cells_output"> + <assert_contents> + <has_h5_keys keys="obs/celesta_final_cell_type" /> + </assert_contents> + </output> + <assert_stdout> + <has_text text="vasculature 273" /> + </assert_stdout> + </test> + <test expect_num_outputs="3"> + <param name="anndata" value="celesta_image.h5ad" /> + <param name="prior_info" value="celesta_prior.csv" /> + <param name="filter" value="filter" /> + <conditional name="runmode"> + <param name="selected_mode" value="assign_cells" /> + </conditional> + <param name="high_thresholds_file" value="celesta_high_exp_thresholds.csv" /> + <repeat name="plot_cells"> + <param name="cell_types" value="vasculature" /> + </repeat> + <param name="save_rds" value="true" /> + <output name="assign_cells_output"> + <assert_contents> + <has_h5_keys keys="obs/celesta_final_cell_type" /> + </assert_contents> + </output> + <output_collection name="cell_assign_plots" type="list" count="1"> + <element name="plot_cells_vasculature" file="plot_cells_vasculature.png" compare="sim_size" /> + </output_collection> + <output name="assign_cells_rds"> + <assert_contents> + <has_size value="1400000" delta="100000" /> + </assert_contents> + </output> + <assert_stdout> + <has_text text="vasculature 168" /> + </assert_stdout> + </test> + </tests> + <help> + <![CDATA[ +**What it does** + +CELESTA (CELl typE identification with SpaTiAl information) is an algorithm aiming to perform +automated cell type identification for multiplexed in situ imaging data. + +CELESTA makes use of both protein expressions and cell spatial neighborhood information +from segmented imaging data for the cell type identification. + +This Galaxy implementation of CELESTA has two run modes: + +**Both run modes share the following inputs** + +`Input Anndata` -- anndata h5ad file where cells are rows, with marker expression in adata.X and cell coordinates in adata.obs + +`Cell-type signature matrix` -- Comma-separated text file containing the following information and formatting: + +(1) The first column has to contain the cell types to be inferred + +(2) The second column has the lineage information for each cell type. The lineage information has three numbers + connected by “_” (underscore). The first number indicates round. Cell types with the same lineage level are + inferred at the same round. Increasing number indicates increasing cell-type resolution. For example, + immune cells -> CD3+ T cells –> CD4+ T cells. The third number is a number assigned to the cell type, + i.e, cell type number. The middle number tells the previous lineage cell type number for the current cell type. + For example, the middle number for CD3+ T cells is 5, because it is a subtype of immune cells which have cell + type number assigned to 5. + +(3) Starting from column three, each column is a protein marker. If the protein marker is known to be expressed + for that cell type, then it is denoted by “1”. If the protein marker is known to not express for a cell type, + then it is denoted by “0”. If the protein marker is irrelevant or uncertain to express for a cell type, + then it is denoted by “NA”. + +`Name of anndata.obs key containing cell or nucleus centroid X position` -- if using output from MCMICRO, this would be 'X_centroid' + +`Name of anndata.obs key containing cell or nucleus centroid Y position` -- if using output from MCMICRO, this would be 'Y_centroid' + +`Choose whether to filter cells` -- Boolean whether to filter out cells with extreme low or high marker intensity that fall outside of thresholds (`CELESTA::FilterCells()`) + +`Set the low threshold for filtering cells` -- high_marker_threshold param in `CELESTA::FilterCells()` + +`Set the high threshold for filtering cells` -- low_marker_threshold param in `CELESTA::FilterCells()` + +**Run modes** + +1. Plot expression probabilities for markers in the cell type signature matrix + + This run mode generates marker expression probability plots for every marker in the cell-type signature matrix. + + **Additional inputs** + + `Specify the point size for plotting cells` -- passed to `ggplot2::geom_point()` size param + + `Specify the height of the figure (inches)` -- passed to `ggplot2::ggsave()` height param + + `Specify the width of the figure (inches)` -- passed to `ggplot2::ggsave()` width param + + **Outputs** + + Collection of `.png` figures showing marker intensity probabilities as spatial scatter plots + +2. Run the cell type assignment + + **Additional inputs** + + `Define the maximum iterations allowed in the EM algorithm per round` -- passed to `CELESTA::AssignCells()` max_iteration param + + `Define an ending condition for the EM algorithm` -- passed to `CELESTA::AssignCells()` cell_change_threshold param + + `Provide a file mapping low/high anchor and index cell assignment thresholds to cell types` -- comma separated text file containing following information and formatting: + +(1) First column contains cell types to be inferred (same order as the cell type signature matrix) + Second column is named `anchor` and contains high or low thresholds for anchor cells + Third column is named `index` and contains high or low thresholds for index cells + +(2) In the `CELESTA::AssignCells()` function, it requires four vectors to define the high and low thresholds for each cell type. The length of the vector equals to the + total number of cell types defined in the cell-type signature matrix. We would suggest start with the default thresholds and modify them by comparing the results + with the original staining. The two vectors are required for defining the “high_expression_threshold”, one for anchor cells and one for index cells (non-anchor cells). + The thresholds define how much the marker expression probability is in order to be considered as expressed. + +(3) For the low thresholds, Normally 1 is assigned to this value unless there are a lot of doublets or co-staining in the data. The Low expression threshold default + values in general are robust, and thus we recommend testing the High expression threshold values. + +`Also save CELESTA object as RDS file` -- Boolean whether to output an RDS file in addition to the default h5ad output + +`Plot combinations of resulting cell type assignments` -- specify any combination of cell types from the cell type signature matrix to plot. This is a repeat element, and one plot will be generated per repitition. There are additional params to control plot aesthetic attributes + +**Outputs** + +`CELESTA assign cells output` -- The primary output, an h5ad file, with new columns containing cell type information. New columns are prepended with `celesta_` + +`CELESTA object RDS` -- optionally output CELESTA object as RDS for downstream analysis in R + +Optional collection of `.png` figures of spatial scatter plots color annotated by cell type assignment + +Visit github.com/plevritis-lab/CELESTA for full documentation + + ]]> + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/celesta_assign_cells.R Wed Aug 28 12:46:48 2024 +0000 @@ -0,0 +1,153 @@ +# --------------------------------------------------------------------------------- +# The main algorithim for CELESTA cell type assignment +# --------------------------------------------------------------------------------- + +suppressWarnings(suppressMessages(library(janitor))) +suppressWarnings(suppressMessages(library(optparse))) +suppressWarnings(suppressMessages(library(dplyr))) +suppressWarnings(suppressMessages(library(anndataR))) +suppressWarnings(suppressMessages(library(Rmixmod))) +suppressWarnings(suppressMessages(library(spdep))) +suppressWarnings(suppressMessages(library(ggplot2))) +suppressWarnings(suppressMessages(library(reshape2))) +suppressWarnings(suppressMessages(library(zeallot))) +suppressWarnings(suppressMessages(library(CELESTA))) + +### Define command line arguments +option_list <- list( + make_option(c("-i", "--imagingdata"), action = "store", default = NA, type = "character", + help = "Path to imaging data"), + make_option(c("-p", "--prior"), action = "store", default = NA, type = "character", + help = "Path to prior marker info file"), + make_option(c("-x", "--xcol"), action = "store", default = NA, type = "character", + help = "Name of column in adata.obs containing X coordinate"), + make_option(c("-y", "--ycol"), action = "store", default = NA, type = "character", + help = "Name of column in adata.obs containing Y coordinate"), + make_option(c("--filter"), action = "store_true", type = "logical", default = FALSE, + help = "Boolean to filter cells or not (default: no filtering)"), + make_option(c("--highfilter"), action = "store", default = 0.9, type = "double", + help = "High marker threshold if filtering cells (default: 0.9)"), + make_option(c("--lowfilter"), action = "store", default = 0.4, type = "double", + help = "Low marker threshold if filtering cells (default: 0.4)"), + make_option(c("--maxiteration"), action = "store", default = 10, type = "integer", + help = "Maximum iterations allowed in the EM algorithm per round"), + make_option(c("--changethresh"), action = "store", default = 0.01, type = "double", + help = "Ending condition for the EM algorithm"), + make_option(c("--highexpthresh"), action = "store", default = "default_high_thresholds", type = "character", + help = "Path to file specifying high expression thresholds for anchor and index cells"), + make_option(c("--lowexpthresh"), action = "store", default = "default_low_thresholds", type = "character", + help = "Path to file specifying low expression thresholds for anchor and index cells") +) + +### Functions +anndata_to_celesta <- function(input_adata, x_col, y_col) { + + #' Function to convert anndata object to dataframe readable by CELESTA + #' Coordinates columns in adata.obs are renamed to "X" and "Y", and placed at beginning of dataframe + #' Marker intensities are concatted columnwise to the X and Y coords. cols: X,Y,Marker_1,...Marker N + + # initialize output as dataframe from adata.obs + celesta_input_dataframe <- data.frame(input_adata$obs) + + # subset to X and Y coordinates from obs only + celesta_input_dataframe <- celesta_input_dataframe %>% + dplyr::select({{x_col}}, {{y_col}}) + + # rename X,Y column names to what CELESTA wants + colnames(celesta_input_dataframe) <- c("X", "Y") + + # merge X,Y coords with marker intensities from adata.X + x_df <- data.frame(input_adata$X) + colnames(x_df) <- input_adata$var_names + celesta_input_dataframe <- cbind(celesta_input_dataframe, x_df) + + return(celesta_input_dataframe) +} + +### Main +# parse args +opt <- parse_args(OptionParser(option_list = option_list)) + +# read anndata, convert to celesta format +adata <- read_h5ad(opt$imagingdata) +celesta_input_df <- anndata_to_celesta(adata, x_col = opt$xcol, y_col = opt$ycol) + +# read prior marker info +prior <- read.csv(opt$prior, check.names = FALSE) + +# clean prior names and input dataframe names +prior <- janitor::clean_names(prior, case = "all_caps") +celesta_input_df <- janitor::clean_names(celesta_input_df, case = "all_caps") + +# instantiate celesta object +CelestaObj <- CreateCelestaObject( + project_title = "", + prior_marker_info = prior, + imaging_data_file = celesta_input_df +) + +# if filtering is specified, filter out cells outside high and low thresholds +if (opt$filter) { + print("filtering cells based on expression") + CelestaObj <- FilterCells(CelestaObj, + high_marker_threshold = opt$highfilter, + low_marker_threshold = opt$lowfilter) +} else { + print("Proceeding to cell type assignment without cell filtering") +} + +# check for non-default expression threshold files +if (opt$highexpthresh != "default_high_thresholds") { + # read high thresholds + print("Using custom high expression thresholds") + high_expression_thresholds <- read.csv(opt$highexpthresh) + hi_exp_thresh_anchor <- high_expression_thresholds$anchor + hi_exp_thresh_index <- high_expression_thresholds$index +} else { + print("Using default high expression thresholds -- this may need adjustment") + hi_exp_thresh_anchor <- rep(0.7, length = 50) + hi_exp_thresh_index <- rep(0.5, length = 50) +} + +if (opt$lowexpthresh != "default_low_thresholds") { + # read low thresholds + print("Using custom low expression thresholds") + low_expression_thresholds <- read.csv(opt$highexpthresh) + low_exp_thresh_anchor <- low_expression_thresholds$anchor + low_exp_thresh_index <- low_expression_thresholds$index +} else { + print("Using default low expression thresholds") + low_exp_thresh_anchor <- rep(0.9, length = 50) + low_exp_thresh_index <- rep(1, length = 50) +} + +# run cell type assignment +CelestaObj <- AssignCells(CelestaObj, + max_iteration = opt$maxiteration, + cell_change_threshold = opt$changethresh, + high_expression_threshold_anchor = hi_exp_thresh_anchor, + low_expression_threshold_anchor = low_exp_thresh_anchor, + high_expression_threshold_index = hi_exp_thresh_index, + low_expression_threshold_index = low_exp_thresh_index, + save_result = FALSE) + +# save object as an RDS file for cell type plotting +# for the time being, this is not exposed to Galaxy +saveRDS(CelestaObj, file = "celestaobj.rds") + +# rename celesta assignment columns so they are obvious in output anndata +celesta_assignments <- CelestaObj@final_cell_type_assignment +celesta_assignments <- janitor::clean_names(celesta_assignments) +colnames(celesta_assignments) <- paste0("celesta_", colnames(celesta_assignments)) + +# merge celesta assignments into anndata object +adata$obs <- cbind(adata$obs, celesta_assignments) + +# print cell type value_counts to standard output +print("----------------------------------------") +print("Final cell type counts") +print(adata$obs %>% dplyr::count(celesta_final_cell_type, sort = TRUE)) +print("----------------------------------------") + +# write output anndata file +write_h5ad(adata, "result.h5ad")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/celesta_plot_cells.R Wed Aug 28 12:46:48 2024 +0000 @@ -0,0 +1,76 @@ +# --------------------------------------------------------------------------------- +# Plot assigned cell type combinations with CELESTA +# --------------------------------------------------------------------------------- + +suppressWarnings(suppressMessages(library(janitor))) +suppressWarnings(suppressMessages(library(optparse))) +suppressWarnings(suppressMessages(library(dplyr))) +suppressWarnings(suppressMessages(library(anndataR))) +suppressWarnings(suppressMessages(library(Rmixmod))) +suppressWarnings(suppressMessages(library(spdep))) +suppressWarnings(suppressMessages(library(ggplot2))) +suppressWarnings(suppressMessages(library(reshape2))) +suppressWarnings(suppressMessages(library(zeallot))) +suppressWarnings(suppressMessages(library(CELESTA))) + +# define command line args +option_list <- list( + make_option(c("-r", "--rds"), action = "store", default = "celestaobj.rds", type = "character", + help = "Path to CelestaObj RDS"), + make_option(c("-p", "--prior"), action = "store", default = NA, type = "character", + help = "Path to prior marker info file"), + make_option(c("-c", "--celltypes"), action = "store", default = NA, type = "character", + help = "Comma-separated list of cell types to plot"), + make_option(c("-s", "--size"), action = "store", default = 1, type = "double", + help = "Point size for plotting"), + make_option(c("--width"), action = "store", default = 12, type = "integer", + help = "Width of plot (inches)"), + make_option(c("--height"), action = "store", default = 12, type = "integer", + help = "Height of plot (inches)"), + make_option(c("--dpi"), action = "store", default = 300, type = "integer", + help = "DPI (dots per inch) of plot") +) + +# parse args +opt <- parse_args(OptionParser(option_list = option_list)) + +CelestaObj <- readRDS(opt$rds) +cell_types_to_plot <- strsplit(opt$celltypes, ",")[[1]] + +# read prior marker info +prior <- read.csv(opt$prior, row.names = 1) + +# get indices of cell types to plot from the prior marker table +cell_type_indices <- which(row.names(prior) %in% cell_types_to_plot) + +print(cell_types_to_plot) +print(cell_type_indices) + +print(row.names(prior)) + +# create output directory if it doesn"t already exist +dir.create("cell_assign_plots") + +# create the cell type plot +g <- PlotCellsAnyCombination(cell_type_assignment_to_plot = CelestaObj@final_cell_type_assignment[, (CelestaObj@total_rounds + 1)], + coords = CelestaObj@coords, + prior_info = prior_marker_info, + cell_number_to_use = cell_type_indices, + test_size = 1, + save_plot = FALSE) + +# create a unique output name for the plot based on the input cell types +cell_types_cleaned <- paste(make_clean_names(cell_types_to_plot), collapse = "") +output_name <- paste(c("plot_cells_", cell_types_cleaned, ".png"), collapse = "") + +# save to subdir +# FIXME: may want to expose plotting params to galaxy +ggsave( + path = "cell_assign_plots", + filename = output_name, + plot = g, + width = opt$width, + height = opt$height, + units = "in", + dpi = opt$dpi +)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/celesta_plot_expression.R Wed Aug 28 12:46:48 2024 +0000 @@ -0,0 +1,108 @@ +# -------------------------------------------------------------------------------------------- +# Plot marker expression probabilities for cell assignment parameter optimization with CELESTA +# -------------------------------------------------------------------------------------------- + +suppressWarnings(suppressMessages(library(janitor))) +suppressWarnings(suppressMessages(library(optparse))) +suppressWarnings(suppressMessages(library(dplyr))) +suppressWarnings(suppressMessages(library(anndataR))) +suppressWarnings(suppressMessages(library(Rmixmod))) +suppressWarnings(suppressMessages(library(spdep))) +suppressWarnings(suppressMessages(library(ggplot2))) +suppressWarnings(suppressMessages(library(reshape2))) +suppressWarnings(suppressMessages(library(zeallot))) +suppressWarnings(suppressMessages(library(CELESTA))) + +### Define command line arguments +option_list <- list( + make_option(c("-i", "--imagingdata"), action = "store", default = NA, type = "character", + help = "Path to imaging data"), + make_option(c("-p", "--prior"), action = "store", default = NA, type = "character", + help = "Path to prior marker info file"), + make_option(c("-x", "--xcol"), action = "store", default = NA, type = "character", + help = "Name of column in adata.obs containing X coordinate"), + make_option(c("-y", "--ycol"), action = "store", default = NA, type = "character", + help = "Name of column in adata.obs containing Y coordinate"), + make_option(c("--filter"), action = "store_true", type = "logical", default = FALSE, + help = "Boolean to filter cells or not (default: no filtering)"), + make_option(c("--highfilter"), action = "store", default = 0.9, type = "double", + help = "High marker threshold if filtering cells (default: 0.9)"), + make_option(c("--lowfilter"), action = "store", default = 0.4, type = "double", + help = "Low marker threshold if filtering cells (default: 0.4)"), + make_option(c("-s", "--size"), action = "store", default = 1, type = "double", + help = "Point size for plotting"), + make_option(c("--width"), action = "store", default = 5, type = "integer", + help = "Width of plot (inches)"), + make_option(c("--height"), action = "store", default = 4, type = "integer", + help = "Height of plot (inches)") +) + +### Functions +anndata_to_celesta <- function(input_adata, x_col, y_col) { + + #' Function to convert anndata object to dataframe readable by CELESTA + #' Coordinates columns in adata.obs are renamed to "X" and "Y", and placed at beginning of dataframe + #' Marker intensities are concatted columnwise to the X and Y coords. cols: X,Y,Marker_1,...Marker N + + # initialize output as dataframe from adata.obs + celesta_input_dataframe <- data.frame(input_adata$obs) + + # subset to X and Y coordinates from obs only + celesta_input_dataframe <- celesta_input_dataframe %>% + dplyr::select({{x_col}}, {{y_col}}) + + # rename X,Y column names to what CELESTA wants + colnames(celesta_input_dataframe) <- c("X", "Y") + + # merge X,Y coords with marker intensities from adata.X + x_df <- data.frame(input_adata$X) + colnames(x_df) <- input_adata$var_names + celesta_input_dataframe <- cbind(celesta_input_dataframe, x_df) + + return(celesta_input_dataframe) +} + +### Main +# parse args +opt <- parse_args(OptionParser(option_list = option_list)) + +# read anndata, convert to celesta format +adata <- read_h5ad(opt$imagingdata) +celesta_input_df <- anndata_to_celesta(adata, x_col = opt$xcol, y_col = opt$ycol) + +# read prior marker info +prior <- read.csv(opt$prior, check.names = FALSE) + +# clean prior names, keeping a copy of originals for writing output +prior_original_names <- colnames(prior) +prior <- janitor::clean_names(prior, case = "all_caps") + +# clean input dataframe names +celesta_input_df <- janitor::clean_names(celesta_input_df, case = "all_caps") + +# instantiate celesta object +CelestaObj <- CreateCelestaObject( + project_title = "", + prior_marker_info = prior, + imaging_data_file = celesta_input_df +) + +# if filtering is specified, filter out cells outside high and low thresholds +if (opt$filter) { + print("filtering cells based on expression") + CelestaObj <- FilterCells(CelestaObj, + high_marker_threshold = opt$highfilter, + low_marker_threshold = opt$lowfilter) +} else { + print("Proceeding to marker expression plotting without cell filtering") +} + +# create output directory if it does not already exist +dir.create("marker_exp_plots") + +# plot expression probability +PlotExpProb(coords = CelestaObj@coords, + marker_exp_prob = CelestaObj@marker_exp_prob, + prior_marker_info = CelestaObj@prior_info, + save_plot = TRUE, + output_dir = "./marker_exp_plots")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Aug 28 12:46:48 2024 +0000 @@ -0,0 +1,39 @@ +<macros> + <token name="@TOOL_VERSION@">0.0.0.9</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">20.01</token> + + <xml name="celesta_requirements"> + <requirements> + <container type="docker">quay.io/goeckslab/celesta:@TOOL_VERSION@</container> + <yield /> + </requirements> + </xml> + + <xml name="macro_stdio"> + <stdio> + <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" /> + </stdio> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1038/s41592-022-01498-z</citation> + </citations> + </xml> + <xml name="celesta_base_options" token_label="celesta_base_options"> + <param name="x_coord" type="text" value="X_centroid" optional="false" label="Name of anndata.obs key containing cell or nucleus centroid X position" /> + <param name="y_coord" type="text" value="Y_centroid" optional="false" label="Name of anndata.obs key containing cell or nucleus centroid Y position" /> + <conditional name="filter_cells"> + <param name="filter" type="select" label="Choose whether to filter cells" help="FilterCells"> + <option value="no_filter" selected="true">Do not filter cells</option> + <option value="filter">Filter cells based on marker intensity</option> + </param> + <when value="no_filter"> + </when> + <when value="filter"> + <param name="low_threshold" type="float" value="0.4" optional="false" label="Set the low threshold for filtering cells" help="Cells below low threshold will be filtered out" /> + <param name="high_threshold" type="float" value="0.9" optional="false" label="Set the high threshold for filtering cells" help="Cells above high threshold will be filtered out" /> + </when> + </conditional> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/celesta_high_exp_thresholds.csv Wed Aug 28 12:46:48 2024 +0000 @@ -0,0 +1,17 @@ +,anchor,index +vasculature,0.9,0.85 +tumor cells,0,0 +aSMA+ stroma,0.8,0.8 +lymphatics,0.95,0.95 +immune cells,0.1,0 +CD3+ T cells,0.6,0.6 +CD15+ granulocytes,0.7,0.7 +B cells,0.95,0.95 +CD11c+ DCs,1,1 +CD68+CD163+ macrophages,0.7,0.6 +plasma cells,0.7,0.7 +NK cells,0.95,0.95 +CD8+ T cells,0.7,0.7 +CD4+ T cells,0.7,0.7 +CD4+ T cells CD45RO+,0.9,0.7 +Tregs,0.9,0.9 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/celesta_prior.csv Wed Aug 28 12:46:48 2024 +0000 @@ -0,0 +1,17 @@ +,Lineage_level,CD31 - vasculature:Cyc_19_ch_3,CD34 - vasculature:Cyc_20_ch_3,Cytokeratin - epithelia:Cyc_10_ch_2,aSMA - smooth muscle:Cyc_11_ch_2,Podoplanin - lymphatics:Cyc_19_ch_4,CD45 - hematopoietic cells:Cyc_4_ch_2,CD15 - granulocytes:Cyc_14_ch_2,CD3 - T cells:Cyc_16_ch_4,CD20 - B cells:Cyc_8_ch_3,CD11c - DCs:Cyc_12_ch_3,CD163 - macrophages:Cyc_17_ch_3,CD68 - macrophages:Cyc_18_ch_4,CD38 - multifunctional:Cyc_20_ch_4,CD56 - NK cells:Cyc_10_ch_4,CD8 - cytotoxic T cells:Cyc_3_ch_2,CD4 - T helper cells:Cyc_6_ch_3,CD45RO - memory cells:Cyc_18_ch_3,FOXP3 - regulatory T cells:Cyc_2_ch_3 +vasculature,1_0_1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +tumor cells,1_0_2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +aSMA+ stroma,1_0_3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +lymphatics,1_0_4,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 +immune cells,1_0_5,0,0,0,0,0,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +CD3+ T cells,2_5_6,0,0,0,0,0,NA,0,1,0,0,0,0,0,0,NA,NA,NA,NA +CD15+ granulocytes,2_5_7,0,0,0,0,0,NA,1,0,0,0,0,NA,0,0,0,0,NA,0 +B cells,2_5_8,0,0,0,0,0,NA,0,0,1,0,0,0,0,0,0,0,0,0 +CD11c+ DCs,2_5_9,0,0,0,0,0,NA,0,0,0,1,0,0,0,0,0,0,0,0 +CD68+CD163+ macrophages,2_5_10,0,0,0,0,0,NA,0,0,0,0,1,1,0,0,0,0,NA,0 +plasma cells,2_5_11,0,0,0,0,0,NA,0,0,0,0,0,0,1,0,0,0,0,0 +NK cells,2_5_12,0,0,0,0,0,NA,0,NA,0,0,0,0,0,1,0,0,0,0 +CD8+ T cells,3_6_13,0,0,0,0,0,NA,0,NA,0,0,0,0,0,0,1,0,NA,0 +CD4+ T cells,3_6_14,0,0,0,0,0,NA,0,NA,0,0,0,0,0,0,0,1,NA,NA +CD4+ T cells CD45RO+,4_14_15,0,0,0,0,0,NA,0,NA,0,0,0,0,0,0,0,NA,1,0 +Tregs,4_14_16,0,0,0,0,0,NA,0,NA,0,0,0,0,0,0,0,NA,0,1 \ No newline at end of file