# HG changeset patch # User azomics # Date 1626990299 0 # Node ID 4dc0048809720e0ff42b282f90a663a389a18593 "planemo upload for repository https://github.com/AstraZeneca-Omics/immport-galaxy-tools/tree/develop/flowtools/convert_fcstxt_to_sce commit 611788da04fbda0b2735de1395d4407ecb75e068" diff -r 000000000000 -r 4dc004880972 FCStxtConvertSCE.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FCStxtConvertSCE.R Thu Jul 22 21:44:59 2021 +0000 @@ -0,0 +1,226 @@ +#!/usr/bin/env Rscript +# GECO flow text conversion tool +# Authors: Emily Combe and Pablo Moreno +# +# This tool converts a flowtext file (or tabular file) into a SingleCellExperiment object +# The tool was written by Emily Combe and edited by Pablo Moreno +# +# There are the options to choose: the columns/markers to include in the assay, the columns to include in the meta data, descriptions of the markers and a metadata file. +# +# +# +# Version 1 +# July 2020 (Emily Combe / Pablo Moreno) + + +suppressPackageStartupMessages(library(SingleCellExperiment)) +suppressPackageStartupMessages(library(optparse)) + +sce <- function(input, fl_cols = list(), mtd_cols = list(), marker_type = list(), meta_data = NULL) { + + + #---------------------# + # reading in flowtext # + #---------------------# + + flowtext <- read.table(input, sep = "\t", header = T) + + #----------------------------------# + # extract-marker-fluorescence data # + #----------------------------------# + + fl_cols_assay <- colnames(flowtext) + + if (length(fl_cols) > 0) { + + if (length(fl_cols) > ncol(flowtext)) { + quit(save = "no", status = 13, runLast = FALSE) + } + fl_cols_assay <- fl_cols_assay[fl_cols_assay %in% fl_cols] + } else { + channels_to_exclude <- c(grep(fl_cols_assay, pattern = "FSC"), + grep(fl_cols_assay, pattern = "SSC"), + grep(fl_cols_assay, pattern = "FSC-A"), + grep(fl_cols_assay, pattern = "SSC-A"), + grep(fl_cols_assay, pattern = "FSC-W"), + grep(fl_cols_assay, pattern = "SSC-W"), + grep(fl_cols_assay, pattern = "FSC-H"), + grep(fl_cols_assay, pattern = "SSC-H"), + grep(fl_cols_assay, pattern = "Time", ignore.case = T), + grep(fl_cols_assay, pattern = "Population|flowSOM|cluster|SOM|pop|cluster", ignore.case = T), + grep(fl_cols_assay, pattern = "Live_Dead|live|dead", ignore.case = T)) + + fl_cols_assay <- fl_cols_assay[-channels_to_exclude] + } + counts <- flowtext[, fl_cols_assay, drop = FALSE] + counts <- as.matrix(counts) + + # transpose data into assay as columns = cells and rows = features. + counts <- base::t(counts) + colnames(counts) <- seq_len(ncol(counts)) + + + #-----------------# + #coldata/meta data# + #-----------------# + + # by default any columns with sample names or cluster results will be extracted - to over ride this user must provide a comma separated list of column name (mtd_cols) + mtd_cols_assay <- colnames(flowtext) + if (length(mtd_cols) > 0) { + if (length(mtd_cols) > ncol(flowtext)) { + quit(save = "no", status = 14, runLast = FALSE) + } + mtd_cols_assay <- mtd_cols_assay[mtd_cols_assay %in% mtd_cols] + } else { + + #create warning here to the user - but without failing + mtd_columns <- c(grep(marker_type, pattern = "sample", ignore.case = T), + grep(marker_type, pattern = "population|flowsom|cluster|pop|som", ignore.case = T)) + + mtd_cols_assay <- mtd_cols_assay[mtd_columns] + } + + md <- flowtext[, mtd_cols_assay, drop = FALSE] + + # if metadata available will be merged with meta data from flow text + if (!is.null(meta_data)) { + + #match column names so case insensitive + md_col <- tolower(colnames(md)) + mtd_col <- tolower(colnames(meta_data)) + + #quit if < 1 or > 1 column names match + if (length(intersect(md_col, mtd_col)) == 0) { + quit(save = "no", status = 15, runLast = FALSE) + } + if (length(intersect(md_col, mtd_col)) > 1) { + quit(save = "no", status = 16, runLast = FALSE) + } + + #merge by matched column + meta_data <- merge(x = md, y = meta_data, all = T) + + } + + #create Single Cell experiment object. SCOPE requires both counts and logcounts assays - for FLOW both assays contain the same data + sce <- SingleCellExperiment(assays = list(counts = counts, logcounts = counts)) + if (!is.null(meta_data)) { + colLabels(sce) <- meta_data + } + + + #-----------------# + # row/marker data # + #-----------------# + + if (length(marker_type) > 0) { + if (length(marker_type) != nrow(rowData(sce))) { + quit(save = "no", status = 17, runLast = FALSE) + } + marker_type[marker_type == "l"] <- "lineage" + marker_type[marker_type == "f"] <- "functional" + + rowData(sce)$marker_type <- marker_type + } + return(sce) +} + +option_list <- list( + make_option( + c("-i", "--input"), + action = "store", + default = NA, + type = "character", + help = "File name for FCS txt file with sample information." + ), + make_option( + c("-o", "--output"), + action = "store", + default = NA, + type = "character", + help = "File name for output SCE R RDS Object." + ), + make_option( + c("-f", "--fl_cols"), + action = "store", + default = NA, + type = "character", + help = "Comma separated list of Columns with markers to be included in the Single Cell Experiment assay" + ), + make_option( + c("-m", "--metadata_columns"), + action = "store", + default = NA, + type = "character", + help = "Columns to be included in the metadata of the Single Cell Experiment." + ), + make_option( + c("--metadata_file"), + action = "store", + default = NA, + type = "character", + help = "Optional meta data txt file to include in Single Cell Experiment." + ), + make_option( + c("--marker_type"), + action = "store", + default = NA, + type = "character", + help = "Marker type" + ) +) + +opt <- parse_args(OptionParser(option_list = option_list)) + +# fluorescence markers to include in the assay +fl_channels <- list() +if (is.na(opt$fl_cols)) { + flag_default <- TRUE +} else { + fl_channels <- as.character(strsplit(opt$fl_cols, ",")[[1]]) + for (channel in fl_channels) { + if (is.na(channel)) { + quit(save = "no", status = 10, runLast = FALSE) + } + } +} + +# meta data columns to go into colDaa in SCE +mt_channels <- list() +if (is.na(opt$metadata_columns)) { + flag_default <- TRUE +} else { + mt_channels <- as.character(strsplit(opt$metadata_columns, ",")[[1]]) + for (channel in mt_channels) { + if (is.na(channel)) { + quit(save = "no", status = 11, runLast = FALSE) + } + } +} + + +#metadata file to add to the coldata in SCE. Must have column matching the sample column in the flowtext file +md <- NULL +if (is.na(opt$metadata_file)) { + flag_default <- TRUE +} else { + md <- read.table(opt$metadata_file, header = TRUE, sep = "\t", check.names = FALSE, as.is = FALSE) +} + +#comma separated list of values to define the markers included in the assay +mark_type <- list() +if (is.na(opt$marker_type)) { + flag_default <- TRUE +} else { + mark_type <- as.character(strsplit(opt$marker_type, ",")[[1]]) + for (mt in mark_type) { + if (is.na(mt)) { + quit(save = "no", status = 12, runLast = FALSE) + } + } +} + + +sce <- sce(input = opt$input, fl_cols = fl_channels, mtd_cols = mt_channels, meta_data = md, marker_type = mark_type) + +saveRDS(sce, file = opt$output) diff -r 000000000000 -r 4dc004880972 convertFCStxtToSCE.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/convertFCStxtToSCE.xml Thu Jul 22 21:44:59 2021 +0000 @@ -0,0 +1,128 @@ + + Single Cell Experiment + + bioconductor-singlecellexperiment + r-optparse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SingleCellExperiment + * tabular file -> SingleCellExperiment + +The output of this tool is a Single Cell Experiment object. This is an R object which contains slots for fluorescence data (assay) and metaData (colData). Please the image below. +Converting to a SCE object will allow you to run tools for single cell RNAseq on your cytometry data. + +For more information on Single Cell Experiment objects please refer to the picture below or the 'bioconductor page'. + +----- + +**Example** + +*Input*: flow text file + + FSC SSC Marker1 Marker2 Marker3 Marker4 Population Sample + 449 157 551 129 169 292 1 sample1 + 894 1023 199 277 320 227 3 sample1 + 262 73 437 69 0 146 6 sample1 + 340 115 509 268 0 74 1 sample1 + ... ... ... ... ... ... ... ... + 523 354 554 176 213 185 2 sample2 + 678 8096 98 74 417 267 6 sample2 + 226 89 467 123 0 324 2 sample2 + 660 175 589 178 1 89 7 sample2 + ... ... ... ... ... ... ... ... + +*Input*: Metadata Table + Sample Pateint ID Sex Age ... + sample1 7245 female 54 ... + sample2 1423 male 36 ... + ... ... ... ... ... + +*Output*: Single Cell Experient object + +.. image:: ./images/sce_modified.png + +]]> + + diff -r 000000000000 -r 4dc004880972 images/sce.png Binary file images/sce.png has changed diff -r 000000000000 -r 4dc004880972 images/sce_modified.png Binary file images/sce_modified.png has changed diff -r 000000000000 -r 4dc004880972 test-data/input.flowtext --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.flowtext Thu Jul 22 21:44:59 2021 +0000 @@ -0,0 +1,10 @@ +FSC SSC Marker1 Marker2 Marker3 Marker4 Population Sample +449 157 551 129 169 292 1 sample1 +894 1023 199 277 320 227 3 sample1 +262 73 437 69 0 146 6 sample1 +340 115 509 268 0 74 1 sample1 +523 354 554 176 213 185 2 sample2 +678 8096 98 74 417 267 6 sample2 +226 89 467 123 0 324 2 sample2 +660 175 589 178 1 89 7 sample2 + diff -r 000000000000 -r 4dc004880972 test-data/metadata.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/metadata.txt Thu Jul 22 21:44:59 2021 +0000 @@ -0,0 +1,3 @@ +Sample Patient_ID Sex Age +sample1 7245 female 54 +sample2 1423 male 26 diff -r 000000000000 -r 4dc004880972 test-data/output.rds Binary file test-data/output.rds has changed