Mercurial > repos > azomics > convert_fcstxt_to_sce
comparison FCStxtConvertSCE.R @ 0:4dc004880972 draft default tip
"planemo upload for repository https://github.com/AstraZeneca-Omics/immport-galaxy-tools/tree/develop/flowtools/convert_fcstxt_to_sce commit 611788da04fbda0b2735de1395d4407ecb75e068"
| author | azomics |
|---|---|
| date | Thu, 22 Jul 2021 21:44:59 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4dc004880972 |
|---|---|
| 1 #!/usr/bin/env Rscript | |
| 2 # GECO flow text conversion tool | |
| 3 # Authors: Emily Combe and Pablo Moreno | |
| 4 # | |
| 5 # This tool converts a flowtext file (or tabular file) into a SingleCellExperiment object | |
| 6 # The tool was written by Emily Combe and edited by Pablo Moreno | |
| 7 # | |
| 8 # There are the options to choose: the columns/markers to include in the assay, the columns to include in the meta data, descriptions of the markers and a metadata file. | |
| 9 # | |
| 10 # | |
| 11 # | |
| 12 # Version 1 | |
| 13 # July 2020 (Emily Combe / Pablo Moreno) | |
| 14 | |
| 15 | |
| 16 suppressPackageStartupMessages(library(SingleCellExperiment)) | |
| 17 suppressPackageStartupMessages(library(optparse)) | |
| 18 | |
| 19 sce <- function(input, fl_cols = list(), mtd_cols = list(), marker_type = list(), meta_data = NULL) { | |
| 20 | |
| 21 | |
| 22 #---------------------# | |
| 23 # reading in flowtext # | |
| 24 #---------------------# | |
| 25 | |
| 26 flowtext <- read.table(input, sep = "\t", header = T) | |
| 27 | |
| 28 #----------------------------------# | |
| 29 # extract-marker-fluorescence data # | |
| 30 #----------------------------------# | |
| 31 | |
| 32 fl_cols_assay <- colnames(flowtext) | |
| 33 | |
| 34 if (length(fl_cols) > 0) { | |
| 35 | |
| 36 if (length(fl_cols) > ncol(flowtext)) { | |
| 37 quit(save = "no", status = 13, runLast = FALSE) | |
| 38 } | |
| 39 fl_cols_assay <- fl_cols_assay[fl_cols_assay %in% fl_cols] | |
| 40 } else { | |
| 41 channels_to_exclude <- c(grep(fl_cols_assay, pattern = "FSC"), | |
| 42 grep(fl_cols_assay, pattern = "SSC"), | |
| 43 grep(fl_cols_assay, pattern = "FSC-A"), | |
| 44 grep(fl_cols_assay, pattern = "SSC-A"), | |
| 45 grep(fl_cols_assay, pattern = "FSC-W"), | |
| 46 grep(fl_cols_assay, pattern = "SSC-W"), | |
| 47 grep(fl_cols_assay, pattern = "FSC-H"), | |
| 48 grep(fl_cols_assay, pattern = "SSC-H"), | |
| 49 grep(fl_cols_assay, pattern = "Time", ignore.case = T), | |
| 50 grep(fl_cols_assay, pattern = "Population|flowSOM|cluster|SOM|pop|cluster", ignore.case = T), | |
| 51 grep(fl_cols_assay, pattern = "Live_Dead|live|dead", ignore.case = T)) | |
| 52 | |
| 53 fl_cols_assay <- fl_cols_assay[-channels_to_exclude] | |
| 54 } | |
| 55 counts <- flowtext[, fl_cols_assay, drop = FALSE] | |
| 56 counts <- as.matrix(counts) | |
| 57 | |
| 58 # transpose data into assay as columns = cells and rows = features. | |
| 59 counts <- base::t(counts) | |
| 60 colnames(counts) <- seq_len(ncol(counts)) | |
| 61 | |
| 62 | |
| 63 #-----------------# | |
| 64 #coldata/meta data# | |
| 65 #-----------------# | |
| 66 | |
| 67 # by default any columns with sample names or cluster results will be extracted - to over ride this user must provide a comma separated list of column name (mtd_cols) | |
| 68 mtd_cols_assay <- colnames(flowtext) | |
| 69 if (length(mtd_cols) > 0) { | |
| 70 if (length(mtd_cols) > ncol(flowtext)) { | |
| 71 quit(save = "no", status = 14, runLast = FALSE) | |
| 72 } | |
| 73 mtd_cols_assay <- mtd_cols_assay[mtd_cols_assay %in% mtd_cols] | |
| 74 } else { | |
| 75 | |
| 76 #create warning here to the user - but without failing | |
| 77 mtd_columns <- c(grep(marker_type, pattern = "sample", ignore.case = T), | |
| 78 grep(marker_type, pattern = "population|flowsom|cluster|pop|som", ignore.case = T)) | |
| 79 | |
| 80 mtd_cols_assay <- mtd_cols_assay[mtd_columns] | |
| 81 } | |
| 82 | |
| 83 md <- flowtext[, mtd_cols_assay, drop = FALSE] | |
| 84 | |
| 85 # if metadata available will be merged with meta data from flow text | |
| 86 if (!is.null(meta_data)) { | |
| 87 | |
| 88 #match column names so case insensitive | |
| 89 md_col <- tolower(colnames(md)) | |
| 90 mtd_col <- tolower(colnames(meta_data)) | |
| 91 | |
| 92 #quit if < 1 or > 1 column names match | |
| 93 if (length(intersect(md_col, mtd_col)) == 0) { | |
| 94 quit(save = "no", status = 15, runLast = FALSE) | |
| 95 } | |
| 96 if (length(intersect(md_col, mtd_col)) > 1) { | |
| 97 quit(save = "no", status = 16, runLast = FALSE) | |
| 98 } | |
| 99 | |
| 100 #merge by matched column | |
| 101 meta_data <- merge(x = md, y = meta_data, all = T) | |
| 102 | |
| 103 } | |
| 104 | |
| 105 #create Single Cell experiment object. SCOPE requires both counts and logcounts assays - for FLOW both assays contain the same data | |
| 106 sce <- SingleCellExperiment(assays = list(counts = counts, logcounts = counts)) | |
| 107 if (!is.null(meta_data)) { | |
| 108 colLabels(sce) <- meta_data | |
| 109 } | |
| 110 | |
| 111 | |
| 112 #-----------------# | |
| 113 # row/marker data # | |
| 114 #-----------------# | |
| 115 | |
| 116 if (length(marker_type) > 0) { | |
| 117 if (length(marker_type) != nrow(rowData(sce))) { | |
| 118 quit(save = "no", status = 17, runLast = FALSE) | |
| 119 } | |
| 120 marker_type[marker_type == "l"] <- "lineage" | |
| 121 marker_type[marker_type == "f"] <- "functional" | |
| 122 | |
| 123 rowData(sce)$marker_type <- marker_type | |
| 124 } | |
| 125 return(sce) | |
| 126 } | |
| 127 | |
| 128 option_list <- list( | |
| 129 make_option( | |
| 130 c("-i", "--input"), | |
| 131 action = "store", | |
| 132 default = NA, | |
| 133 type = "character", | |
| 134 help = "File name for FCS txt file with sample information." | |
| 135 ), | |
| 136 make_option( | |
| 137 c("-o", "--output"), | |
| 138 action = "store", | |
| 139 default = NA, | |
| 140 type = "character", | |
| 141 help = "File name for output SCE R RDS Object." | |
| 142 ), | |
| 143 make_option( | |
| 144 c("-f", "--fl_cols"), | |
| 145 action = "store", | |
| 146 default = NA, | |
| 147 type = "character", | |
| 148 help = "Comma separated list of Columns with markers to be included in the Single Cell Experiment assay" | |
| 149 ), | |
| 150 make_option( | |
| 151 c("-m", "--metadata_columns"), | |
| 152 action = "store", | |
| 153 default = NA, | |
| 154 type = "character", | |
| 155 help = "Columns to be included in the metadata of the Single Cell Experiment." | |
| 156 ), | |
| 157 make_option( | |
| 158 c("--metadata_file"), | |
| 159 action = "store", | |
| 160 default = NA, | |
| 161 type = "character", | |
| 162 help = "Optional meta data txt file to include in Single Cell Experiment." | |
| 163 ), | |
| 164 make_option( | |
| 165 c("--marker_type"), | |
| 166 action = "store", | |
| 167 default = NA, | |
| 168 type = "character", | |
| 169 help = "Marker type" | |
| 170 ) | |
| 171 ) | |
| 172 | |
| 173 opt <- parse_args(OptionParser(option_list = option_list)) | |
| 174 | |
| 175 # fluorescence markers to include in the assay | |
| 176 fl_channels <- list() | |
| 177 if (is.na(opt$fl_cols)) { | |
| 178 flag_default <- TRUE | |
| 179 } else { | |
| 180 fl_channels <- as.character(strsplit(opt$fl_cols, ",")[[1]]) | |
| 181 for (channel in fl_channels) { | |
| 182 if (is.na(channel)) { | |
| 183 quit(save = "no", status = 10, runLast = FALSE) | |
| 184 } | |
| 185 } | |
| 186 } | |
| 187 | |
| 188 # meta data columns to go into colDaa in SCE | |
| 189 mt_channels <- list() | |
| 190 if (is.na(opt$metadata_columns)) { | |
| 191 flag_default <- TRUE | |
| 192 } else { | |
| 193 mt_channels <- as.character(strsplit(opt$metadata_columns, ",")[[1]]) | |
| 194 for (channel in mt_channels) { | |
| 195 if (is.na(channel)) { | |
| 196 quit(save = "no", status = 11, runLast = FALSE) | |
| 197 } | |
| 198 } | |
| 199 } | |
| 200 | |
| 201 | |
| 202 #metadata file to add to the coldata in SCE. Must have column matching the sample column in the flowtext file | |
| 203 md <- NULL | |
| 204 if (is.na(opt$metadata_file)) { | |
| 205 flag_default <- TRUE | |
| 206 } else { | |
| 207 md <- read.table(opt$metadata_file, header = TRUE, sep = "\t", check.names = FALSE, as.is = FALSE) | |
| 208 } | |
| 209 | |
| 210 #comma separated list of values to define the markers included in the assay | |
| 211 mark_type <- list() | |
| 212 if (is.na(opt$marker_type)) { | |
| 213 flag_default <- TRUE | |
| 214 } else { | |
| 215 mark_type <- as.character(strsplit(opt$marker_type, ",")[[1]]) | |
| 216 for (mt in mark_type) { | |
| 217 if (is.na(mt)) { | |
| 218 quit(save = "no", status = 12, runLast = FALSE) | |
| 219 } | |
| 220 } | |
| 221 } | |
| 222 | |
| 223 | |
| 224 sce <- sce(input = opt$input, fl_cols = fl_channels, mtd_cols = mt_channels, meta_data = md, marker_type = mark_type) | |
| 225 | |
| 226 saveRDS(sce, file = opt$output) |
