view FCStxtConvertSCE.R @ 0:4dc004880972 draft default tip

"planemo upload for repository commit 611788da04fbda0b2735de1395d4407ecb75e068"
author azomics
date Thu, 22 Jul 2021 21:44:59 +0000
line wrap: on
line source

#!/usr/bin/env Rscript
# GECO flow text conversion tool
# Authors: Emily Combe and Pablo Moreno
# This tool converts a flowtext file (or tabular file) into a SingleCellExperiment object
# The tool was written by Emily Combe and edited by Pablo Moreno
# There are the options to choose: the columns/markers to include in the assay, the columns to include in the meta data, descriptions of the markers and a metadata file.
# Version 1
# July 2020 (Emily Combe / Pablo Moreno)


sce <- function(input, fl_cols = list(), mtd_cols = list(), marker_type = list(), meta_data = NULL) {

    # reading in flowtext #

    flowtext <- read.table(input, sep = "\t", header = T)

    # extract-marker-fluorescence data #

    fl_cols_assay <- colnames(flowtext)

    if (length(fl_cols) > 0) {

        if (length(fl_cols) > ncol(flowtext)) {
            quit(save = "no", status = 13, runLast = FALSE)
        fl_cols_assay <- fl_cols_assay[fl_cols_assay %in% fl_cols]
    } else {
        channels_to_exclude <- c(grep(fl_cols_assay, pattern = "FSC"),
                                 grep(fl_cols_assay, pattern = "SSC"),
                                 grep(fl_cols_assay, pattern = "FSC-A"),
                                 grep(fl_cols_assay, pattern = "SSC-A"),
                                 grep(fl_cols_assay, pattern = "FSC-W"),
                                 grep(fl_cols_assay, pattern = "SSC-W"),
                                 grep(fl_cols_assay, pattern = "FSC-H"),
                                 grep(fl_cols_assay, pattern = "SSC-H"),
                                 grep(fl_cols_assay, pattern = "Time", = T),
                                 grep(fl_cols_assay, pattern = "Population|flowSOM|cluster|SOM|pop|cluster", = T),
                                 grep(fl_cols_assay, pattern = "Live_Dead|live|dead", = T))

        fl_cols_assay <- fl_cols_assay[-channels_to_exclude]
    counts <- flowtext[, fl_cols_assay, drop = FALSE]
    counts <- as.matrix(counts)

    # transpose data into assay as columns = cells and rows = features.
    counts <- base::t(counts)
    colnames(counts) <- seq_len(ncol(counts))

    #coldata/meta data#

    # by default any columns with sample names or cluster results will be extracted - to over ride this user must provide a comma separated list of column name (mtd_cols)
    mtd_cols_assay <- colnames(flowtext)
    if (length(mtd_cols) > 0) {
        if (length(mtd_cols) > ncol(flowtext)) {
            quit(save = "no", status = 14, runLast = FALSE)
        mtd_cols_assay <- mtd_cols_assay[mtd_cols_assay %in% mtd_cols]
    } else {

        #create warning here to the user - but without failing
        mtd_columns <- c(grep(marker_type, pattern = "sample", = T),
                         grep(marker_type, pattern = "population|flowsom|cluster|pop|som", = T))

        mtd_cols_assay <- mtd_cols_assay[mtd_columns]

    md <- flowtext[, mtd_cols_assay, drop = FALSE]

    # if metadata available will be merged with meta data from flow text
    if (!is.null(meta_data)) {

        #match column names so case insensitive
        md_col <- tolower(colnames(md))
        mtd_col <- tolower(colnames(meta_data))

        #quit if < 1 or > 1 column names match
        if (length(intersect(md_col, mtd_col)) == 0) {
            quit(save = "no", status = 15, runLast = FALSE)
        if (length(intersect(md_col, mtd_col)) > 1) {
            quit(save = "no", status = 16, runLast = FALSE)

        #merge by matched column
        meta_data <- merge(x = md, y = meta_data, all = T)


    #create Single Cell experiment object. SCOPE requires both counts and logcounts assays - for FLOW both assays contain the same data
    sce <- SingleCellExperiment(assays = list(counts = counts, logcounts = counts))
    if (!is.null(meta_data)) {
      colLabels(sce) <- meta_data

    # row/marker data #

    if (length(marker_type) > 0) {
      if (length(marker_type) != nrow(rowData(sce))) {
        quit(save = "no", status = 17, runLast = FALSE)
      marker_type[marker_type == "l"] <- "lineage"
      marker_type[marker_type == "f"] <- "functional"

      rowData(sce)$marker_type <- marker_type

option_list <- list(
    c("-i", "--input"),
    action = "store",
    default = NA,
    type = "character",
    help = "File name for FCS txt file with sample information."
    c("-o", "--output"),
    action = "store",
    default = NA,
    type = "character",
    help = "File name for output SCE R RDS Object."
    c("-f", "--fl_cols"),
    action = "store",
    default = NA,
    type = "character",
    help = "Comma separated list of Columns with markers to be included in the Single Cell Experiment assay"
    c("-m", "--metadata_columns"),
    action = "store",
    default = NA,
    type = "character",
    help = "Columns to be included in the metadata of the Single Cell Experiment."
    action = "store",
    default = NA,
    type = "character",
    help = "Optional meta data txt file to include in Single Cell Experiment."
    action = "store",
    default = NA,
    type = "character",
    help = "Marker type"

opt <- parse_args(OptionParser(option_list = option_list))

# fluorescence markers to include in the assay
fl_channels <- list()
if ($fl_cols)) {
    flag_default <- TRUE
} else {
    fl_channels <- as.character(strsplit(opt$fl_cols, ",")[[1]])
    for (channel in fl_channels) {
        if ( {
            quit(save = "no", status = 10, runLast = FALSE)

# meta data columns to go into colDaa in SCE
mt_channels <- list()
if ($metadata_columns)) {
    flag_default <- TRUE
} else {
    mt_channels <- as.character(strsplit(opt$metadata_columns, ",")[[1]])
    for (channel in mt_channels) {
        if ( {
            quit(save = "no", status = 11, runLast = FALSE)

#metadata file to add to the coldata in SCE. Must have column matching the sample column in the flowtext file
md <- NULL
if ($metadata_file)) {
    flag_default <- TRUE
} else {
    md <- read.table(opt$metadata_file, header = TRUE, sep = "\t", check.names = FALSE, = FALSE)

#comma separated list of values to define the markers included in the assay
mark_type <- list()
if ($marker_type)) {
    flag_default <- TRUE
} else {
    mark_type <- as.character(strsplit(opt$marker_type, ",")[[1]])
    for (mt in mark_type) {
        if ( {
            quit(save = "no", status = 12, runLast = FALSE)

sce <- sce(input = opt$input, fl_cols = fl_channels, mtd_cols = mt_channels, meta_data = md, marker_type = mark_type)

saveRDS(sce, file = opt$output)