comparison FCStxtConvertSCE.R @ 0:4dc004880972 draft default tip

"planemo upload for repository https://github.com/AstraZeneca-Omics/immport-galaxy-tools/tree/develop/flowtools/convert_fcstxt_to_sce commit 611788da04fbda0b2735de1395d4407ecb75e068"
author azomics
date Thu, 22 Jul 2021 21:44:59 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4dc004880972
1 #!/usr/bin/env Rscript
2 # GECO flow text conversion tool
3 # Authors: Emily Combe and Pablo Moreno
4 #
5 # This tool converts a flowtext file (or tabular file) into a SingleCellExperiment object
6 # The tool was written by Emily Combe and edited by Pablo Moreno
7 #
8 # There are the options to choose: the columns/markers to include in the assay, the columns to include in the meta data, descriptions of the markers and a metadata file.
9 #
10 #
11 #
12 # Version 1
13 # July 2020 (Emily Combe / Pablo Moreno)
14
15
16 suppressPackageStartupMessages(library(SingleCellExperiment))
17 suppressPackageStartupMessages(library(optparse))
18
19 sce <- function(input, fl_cols = list(), mtd_cols = list(), marker_type = list(), meta_data = NULL) {
20
21
22 #---------------------#
23 # reading in flowtext #
24 #---------------------#
25
26 flowtext <- read.table(input, sep = "\t", header = T)
27
28 #----------------------------------#
29 # extract-marker-fluorescence data #
30 #----------------------------------#
31
32 fl_cols_assay <- colnames(flowtext)
33
34 if (length(fl_cols) > 0) {
35
36 if (length(fl_cols) > ncol(flowtext)) {
37 quit(save = "no", status = 13, runLast = FALSE)
38 }
39 fl_cols_assay <- fl_cols_assay[fl_cols_assay %in% fl_cols]
40 } else {
41 channels_to_exclude <- c(grep(fl_cols_assay, pattern = "FSC"),
42 grep(fl_cols_assay, pattern = "SSC"),
43 grep(fl_cols_assay, pattern = "FSC-A"),
44 grep(fl_cols_assay, pattern = "SSC-A"),
45 grep(fl_cols_assay, pattern = "FSC-W"),
46 grep(fl_cols_assay, pattern = "SSC-W"),
47 grep(fl_cols_assay, pattern = "FSC-H"),
48 grep(fl_cols_assay, pattern = "SSC-H"),
49 grep(fl_cols_assay, pattern = "Time", ignore.case = T),
50 grep(fl_cols_assay, pattern = "Population|flowSOM|cluster|SOM|pop|cluster", ignore.case = T),
51 grep(fl_cols_assay, pattern = "Live_Dead|live|dead", ignore.case = T))
52
53 fl_cols_assay <- fl_cols_assay[-channels_to_exclude]
54 }
55 counts <- flowtext[, fl_cols_assay, drop = FALSE]
56 counts <- as.matrix(counts)
57
58 # transpose data into assay as columns = cells and rows = features.
59 counts <- base::t(counts)
60 colnames(counts) <- seq_len(ncol(counts))
61
62
63 #-----------------#
64 #coldata/meta data#
65 #-----------------#
66
67 # by default any columns with sample names or cluster results will be extracted - to over ride this user must provide a comma separated list of column name (mtd_cols)
68 mtd_cols_assay <- colnames(flowtext)
69 if (length(mtd_cols) > 0) {
70 if (length(mtd_cols) > ncol(flowtext)) {
71 quit(save = "no", status = 14, runLast = FALSE)
72 }
73 mtd_cols_assay <- mtd_cols_assay[mtd_cols_assay %in% mtd_cols]
74 } else {
75
76 #create warning here to the user - but without failing
77 mtd_columns <- c(grep(marker_type, pattern = "sample", ignore.case = T),
78 grep(marker_type, pattern = "population|flowsom|cluster|pop|som", ignore.case = T))
79
80 mtd_cols_assay <- mtd_cols_assay[mtd_columns]
81 }
82
83 md <- flowtext[, mtd_cols_assay, drop = FALSE]
84
85 # if metadata available will be merged with meta data from flow text
86 if (!is.null(meta_data)) {
87
88 #match column names so case insensitive
89 md_col <- tolower(colnames(md))
90 mtd_col <- tolower(colnames(meta_data))
91
92 #quit if < 1 or > 1 column names match
93 if (length(intersect(md_col, mtd_col)) == 0) {
94 quit(save = "no", status = 15, runLast = FALSE)
95 }
96 if (length(intersect(md_col, mtd_col)) > 1) {
97 quit(save = "no", status = 16, runLast = FALSE)
98 }
99
100 #merge by matched column
101 meta_data <- merge(x = md, y = meta_data, all = T)
102
103 }
104
105 #create Single Cell experiment object. SCOPE requires both counts and logcounts assays - for FLOW both assays contain the same data
106 sce <- SingleCellExperiment(assays = list(counts = counts, logcounts = counts))
107 if (!is.null(meta_data)) {
108 colLabels(sce) <- meta_data
109 }
110
111
112 #-----------------#
113 # row/marker data #
114 #-----------------#
115
116 if (length(marker_type) > 0) {
117 if (length(marker_type) != nrow(rowData(sce))) {
118 quit(save = "no", status = 17, runLast = FALSE)
119 }
120 marker_type[marker_type == "l"] <- "lineage"
121 marker_type[marker_type == "f"] <- "functional"
122
123 rowData(sce)$marker_type <- marker_type
124 }
125 return(sce)
126 }
127
128 option_list <- list(
129 make_option(
130 c("-i", "--input"),
131 action = "store",
132 default = NA,
133 type = "character",
134 help = "File name for FCS txt file with sample information."
135 ),
136 make_option(
137 c("-o", "--output"),
138 action = "store",
139 default = NA,
140 type = "character",
141 help = "File name for output SCE R RDS Object."
142 ),
143 make_option(
144 c("-f", "--fl_cols"),
145 action = "store",
146 default = NA,
147 type = "character",
148 help = "Comma separated list of Columns with markers to be included in the Single Cell Experiment assay"
149 ),
150 make_option(
151 c("-m", "--metadata_columns"),
152 action = "store",
153 default = NA,
154 type = "character",
155 help = "Columns to be included in the metadata of the Single Cell Experiment."
156 ),
157 make_option(
158 c("--metadata_file"),
159 action = "store",
160 default = NA,
161 type = "character",
162 help = "Optional meta data txt file to include in Single Cell Experiment."
163 ),
164 make_option(
165 c("--marker_type"),
166 action = "store",
167 default = NA,
168 type = "character",
169 help = "Marker type"
170 )
171 )
172
173 opt <- parse_args(OptionParser(option_list = option_list))
174
175 # fluorescence markers to include in the assay
176 fl_channels <- list()
177 if (is.na(opt$fl_cols)) {
178 flag_default <- TRUE
179 } else {
180 fl_channels <- as.character(strsplit(opt$fl_cols, ",")[[1]])
181 for (channel in fl_channels) {
182 if (is.na(channel)) {
183 quit(save = "no", status = 10, runLast = FALSE)
184 }
185 }
186 }
187
188 # meta data columns to go into colDaa in SCE
189 mt_channels <- list()
190 if (is.na(opt$metadata_columns)) {
191 flag_default <- TRUE
192 } else {
193 mt_channels <- as.character(strsplit(opt$metadata_columns, ",")[[1]])
194 for (channel in mt_channels) {
195 if (is.na(channel)) {
196 quit(save = "no", status = 11, runLast = FALSE)
197 }
198 }
199 }
200
201
202 #metadata file to add to the coldata in SCE. Must have column matching the sample column in the flowtext file
203 md <- NULL
204 if (is.na(opt$metadata_file)) {
205 flag_default <- TRUE
206 } else {
207 md <- read.table(opt$metadata_file, header = TRUE, sep = "\t", check.names = FALSE, as.is = FALSE)
208 }
209
210 #comma separated list of values to define the markers included in the assay
211 mark_type <- list()
212 if (is.na(opt$marker_type)) {
213 flag_default <- TRUE
214 } else {
215 mark_type <- as.character(strsplit(opt$marker_type, ",")[[1]])
216 for (mt in mark_type) {
217 if (is.na(mt)) {
218 quit(save = "no", status = 12, runLast = FALSE)
219 }
220 }
221 }
222
223
224 sce <- sce(input = opt$input, fl_cols = fl_channels, mtd_cols = mt_channels, meta_data = md, marker_type = mark_type)
225
226 saveRDS(sce, file = opt$output)