Mercurial > repos > goeckslab > celesta
view celesta_assign_cells.R @ 1:44d4c885d9b5 draft
planemo upload for repository https://github.com/goeckslab/tools-mti/tree/main/tools/celesta commit 1f02a762ab43a29aa7f59760798ff084f86e37db
author | goeckslab |
---|---|
date | Thu, 12 Sep 2024 16:49:51 +0000 |
parents | 8001319743c0 |
children |
line wrap: on
line source
# --------------------------------------------------------------------------------- # The main algorithim for CELESTA cell type assignment # --------------------------------------------------------------------------------- suppressWarnings(suppressMessages(library(janitor))) suppressWarnings(suppressMessages(library(optparse))) suppressWarnings(suppressMessages(library(dplyr))) suppressWarnings(suppressMessages(library(anndataR))) suppressWarnings(suppressMessages(library(Rmixmod))) suppressWarnings(suppressMessages(library(spdep))) suppressWarnings(suppressMessages(library(ggplot2))) suppressWarnings(suppressMessages(library(reshape2))) suppressWarnings(suppressMessages(library(zeallot))) suppressWarnings(suppressMessages(library(CELESTA))) ### Define command line arguments option_list <- list( make_option(c("-i", "--imagingdata"), action = "store", default = NA, type = "character", help = "Path to imaging data"), make_option(c("-p", "--prior"), action = "store", default = NA, type = "character", help = "Path to prior marker info file"), make_option(c("-x", "--xcol"), action = "store", default = NA, type = "character", help = "Name of column in adata.obs containing X coordinate"), make_option(c("-y", "--ycol"), action = "store", default = NA, type = "character", help = "Name of column in adata.obs containing Y coordinate"), make_option(c("--filter"), action = "store_true", type = "logical", default = FALSE, help = "Boolean to filter cells or not (default: no filtering)"), make_option(c("--highfilter"), action = "store", default = 0.9, type = "double", help = "High marker threshold if filtering cells (default: 0.9)"), make_option(c("--lowfilter"), action = "store", default = 0.4, type = "double", help = "Low marker threshold if filtering cells (default: 0.4)"), make_option(c("--maxiteration"), action = "store", default = 10, type = "integer", help = "Maximum iterations allowed in the EM algorithm per round"), make_option(c("--changethresh"), action = "store", default = 0.01, type = "double", help = "Ending condition for the EM algorithm"), make_option(c("--highexpthresh"), action = "store", default = "default_high_thresholds", type = "character", help = "Path to file specifying high expression thresholds for anchor and index cells"), make_option(c("--lowexpthresh"), action = "store", default = "default_low_thresholds", type = "character", help = "Path to file specifying low expression thresholds for anchor and index cells") ) ### Functions anndata_to_celesta <- function(input_adata, x_col, y_col) { #' Function to convert anndata object to dataframe readable by CELESTA #' Coordinates columns in adata.obs are renamed to "X" and "Y", and placed at beginning of dataframe #' Marker intensities are concatted columnwise to the X and Y coords. cols: X,Y,Marker_1,...Marker N # initialize output as dataframe from adata.obs celesta_input_dataframe <- data.frame(input_adata$obs) # subset to X and Y coordinates from obs only celesta_input_dataframe <- celesta_input_dataframe %>% dplyr::select({{x_col}}, {{y_col}}) # rename X,Y column names to what CELESTA wants colnames(celesta_input_dataframe) <- c("X", "Y") # merge X,Y coords with marker intensities from adata.X x_df <- data.frame(input_adata$X) colnames(x_df) <- input_adata$var_names celesta_input_dataframe <- cbind(celesta_input_dataframe, x_df) return(celesta_input_dataframe) } ### Main # parse args opt <- parse_args(OptionParser(option_list = option_list)) # read anndata, convert to celesta format adata <- read_h5ad(opt$imagingdata) celesta_input_df <- anndata_to_celesta(adata, x_col = opt$xcol, y_col = opt$ycol) # read prior marker info prior <- read.csv(opt$prior, check.names = FALSE) # clean prior names and input dataframe names prior <- janitor::clean_names(prior, case = "all_caps") celesta_input_df <- janitor::clean_names(celesta_input_df, case = "all_caps") # instantiate celesta object CelestaObj <- CreateCelestaObject( project_title = "", prior_marker_info = prior, imaging_data_file = celesta_input_df ) # if filtering is specified, filter out cells outside high and low thresholds if (opt$filter) { print("filtering cells based on expression") CelestaObj <- FilterCells(CelestaObj, high_marker_threshold = opt$highfilter, low_marker_threshold = opt$lowfilter) } else { print("Proceeding to cell type assignment without cell filtering") } # check for non-default expression threshold files if (opt$highexpthresh != "default_high_thresholds") { # read high thresholds print("Using custom high expression thresholds") high_expression_thresholds <- read.csv(opt$highexpthresh) hi_exp_thresh_anchor <- high_expression_thresholds$anchor hi_exp_thresh_index <- high_expression_thresholds$index } else { print("Using default high expression thresholds -- this may need adjustment") hi_exp_thresh_anchor <- rep(0.7, length = 50) hi_exp_thresh_index <- rep(0.5, length = 50) } if (opt$lowexpthresh != "default_low_thresholds") { # read low thresholds print("Using custom low expression thresholds") low_expression_thresholds <- read.csv(opt$highexpthresh) low_exp_thresh_anchor <- low_expression_thresholds$anchor low_exp_thresh_index <- low_expression_thresholds$index } else { print("Using default low expression thresholds") low_exp_thresh_anchor <- rep(0.9, length = 50) low_exp_thresh_index <- rep(1, length = 50) } # run cell type assignment CelestaObj <- AssignCells(CelestaObj, max_iteration = opt$maxiteration, cell_change_threshold = opt$changethresh, high_expression_threshold_anchor = hi_exp_thresh_anchor, low_expression_threshold_anchor = low_exp_thresh_anchor, high_expression_threshold_index = hi_exp_thresh_index, low_expression_threshold_index = low_exp_thresh_index, save_result = FALSE) # save object as an RDS file for cell type plotting # for the time being, this is not exposed to Galaxy saveRDS(CelestaObj, file = "celestaobj.rds") # rename celesta assignment columns so they are obvious in output anndata celesta_assignments <- CelestaObj@final_cell_type_assignment celesta_assignments <- janitor::clean_names(celesta_assignments) colnames(celesta_assignments) <- paste0("celesta_", colnames(celesta_assignments)) # merge celesta assignments into anndata object adata$obs <- cbind(adata$obs, celesta_assignments) # print cell type value_counts to standard output print("----------------------------------------") print("Final cell type counts") print(adata$obs %>% dplyr::count(celesta_final_cell_type, sort = TRUE)) print("----------------------------------------") # write output anndata file write_h5ad(adata, "result.h5ad")