comparison FCSGateTrans.R @ 1:c28c2e680bf5 draft

"planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/fcs_gate_trans commit f34ed6ca8e77b9792a270890262c2936b13e30b9"
author azomics
date Mon, 22 Jun 2020 20:30:34 -0400
parents
children
comparison
equal deleted inserted replaced
0:78b8ab344edd 1:c28c2e680bf5
1 #!/usr/bin/env Rscript
2 ######################################################################
3 # Copyright (c) 2016 Northrop Grumman.
4 # All rights reserved.
5 ######################################################################
6 # ImmPort FCS conversion program
7 # Authors: Yue Liu and Yu "Max" Qian
8 #
9 # Reference: FCSTrans: An open source software system for FCS
10 # file conversion and data transformation
11 # Qian Y, Liu Y, Campbell J, Thomson E, Kong YM,
12 # Scheuermann RH. 2012 Cytometry Part A. 81A(5)
13 # doi.org/10.1002/cyto.a.22037
14 #
15 # To run in R
16 # 1) library(flowCore)
17 # 2) source("FCSTrans.R")
18 # 3) transformFCS("filename")
19 #
20 #
21 # Automated Gating of Lymphocytes with FlowDensity
22 # Authors of FlowDensity: Jafar Taghiyar, Mehrnoush Malek
23 #
24 # Reference: flowDensity: reproducing manual gating of flow
25 # cytometry data by automated density-based cell
26 # population identification
27 # Malek M, Taghiyar MJ, Chong L, Finak G,
28 # Gottardo R, Brinkman RR. 2015 Bioinformatics 31(4)
29 # doi: 10.1093/bioinformatics/btu677
30 #
31 #
32 # Version 1.5
33 # March 2016 -- added lines to run directly from command line (cristel thomas)
34 # May 2016 -- added automated gating (cristel thomas)
35 # August 2016 -- added options for data transformation (cristel thomas)
36 # April 2017 -- added logicle to transformation options (cristel thomas)
37 # July 2017 -- added options for outputs (cristel thomas)
38
39 library(flowCore)
40 library(flowDensity)
41 library(GEOmap)
42 #
43 # Set output to 0 when input is less than cutoff value
44 #
45 ipfloor <- function (x, cutoff=0, target=0) {
46 y <- x
47 if (x <= cutoff) {
48 y <- target
49 }
50 return(y)
51 }
52 #
53 # Set output to 0 when input is less than cutoff value
54 #
55 ipceil <- function (x, cutoff=0, target=0) {
56 y <- x
57 if (x >= cutoff) {
58 y <- target
59 }
60 return(y)
61 }
62 #
63 # Calculation core of iplogicle
64 #
65 iplogicore <- function (x, w, r, d, scale) {
66 tol <- .Machine$double.eps^0.8
67 maxit <- as.integer(5000)
68 d <- d * log(10)
69 scale <- scale / d
70 p <- if (w == 0) {
71 1
72 } else {
73 uniroot(function(p) -w + 2 * p * log(p)/(p + 1), c(.Machine$double.eps,
74 2 * (w + d)))$root
75 }
76 a <- r * exp(-(d - w))
77 b <- 1
78 c <- r * exp(-(d - w)) * p^2
79 d <- 1/p
80 f <- a * (p^2 - 1)
81 y <- .Call("flowCore_biexponential_transform", PACKAGE= 'flowCore',
82 as.double(x), a, b, c, d, f, w, tol, maxit)
83 y <- sapply(y * scale, ipfloor)
84 return(y)
85 }
86 #
87 # Function for calculating w
88 #
89 iplogiclew <- function (w, cutoff=-111, r=262144, d=4.5, scale=1) {
90 if (w > d)
91 w <- d
92 y <- iplogicore(cutoff, w, r, d, scale) - .Machine$double.eps^0.6
93 return(y)
94 }
95 #
96 # ImmPort logicle function - convert fluorescent marker values to channel output
97 #
98 iplogicle <- function (x, r=262144, d=4.5, range=4096, cutoff=-111, w=-1) {
99 if (w > d) {
100 stop("Negative range decades must be smaller than total number of decades")
101 }
102 if (w < 0) {
103 w = uniroot(iplogiclew, c(0, d), cutoff=cutoff)$root
104 }
105 y <- iplogicore(x, w, r, d, range)
106 return(y)
107 }
108 #
109 # Convert fluorescent values to channel output using log transformation
110 #
111 iplog <- function(x) {
112 x <- sapply(x, ipfloor, cutoff=1, target=1)
113 y <- 1024 * log10(x) - 488.6
114 return(y)
115 }
116 #
117 # ImmPort linear function - convert scatter values to channel output
118 # linear transformation
119 #
120 ipscatter <- function (x, channelrange=262144) {
121 y <- 4095.0 * x / channelrange
122 y <- sapply(y, ipfloor)
123 y <- sapply(y, ipceil, cutoff=4095, target=4095)
124 return(y)
125 }
126 #
127 # ImmPort time function - convert time values to channel output
128 # linear transformation
129 iptime <- function (x, channelrange) {
130 # use simple cutoff for now
131 y <- sapply(x, ipfloor)
132 return(y)
133 }
134 #
135 # Determine the type of marker. Marker type is used
136 # to determine type of transformation to apply for this channel.
137 # Before 2010 FLUO_AREA type used iplogicile and
138 # FLOU_NON_AREA type used iplog. In 2010 Yue, changed code so
139 # all fluorescent channels use iplogicle. Below is the note from SVN
140 #
141 # Version 1.1
142 # 2010-07-02
143 # -----------
144 # Added data type checking on both FCS version 2 and 3
145 # Removed log conversion for non-area fluorescent channel
146 # Applied logicle conversion for all fluorescent channels
147 #
148 # The GenePattern version uses iplog for FLOU_NON_AREA, rather
149 # than iplogicle.
150 #
151 getMarkerType <- function(name,debug=FALSE) {
152 type <- ""
153 prefix2 <- toupper(substr(name, 1, 2))
154 prefix3 <- toupper(substr(name, 1, 3))
155 prefix4 <- toupper(substr(name, 1, 4))
156 if (prefix2 == "FS" || prefix2 == "SS") {
157 type <- "SCATTER"
158 } else if (prefix3 == "FSC" || prefix3 == "SSC") {
159 type <- "SCATTER"
160 } else if (prefix4 == "TIME") {
161 type <- "TIME"
162 } else {
163 pieces <- unlist(strsplit(name, "-"))
164 if (toupper(pieces[length(pieces)]) == "A") {
165 type <- "FLUO_AREA"
166 } else {
167 type <- "FLUO_NON_AREA"
168 }
169 }
170 if (debug) {
171 print(paste("Marker:", name, ", Type:", type))
172 }
173 return(type)
174 }
175 #
176 # Scale data
177 #
178 scaleData <- function(data, channelrange=0) {
179 datamax <- range(data)[2] # range() returns [min, max]
180 if (datamax > channelrange) {
181 channelrange <- datamax
182 }
183 #if (channelrange == 0) {
184 # channelrange = range(data)[2]
185 #}
186 data <- 262144 * data / channelrange
187 return(data)
188 }
189 #
190 # Check if AccuriData. Accuri data needs different conversion
191 #
192 isAccuriData <- function(keywords) {
193 isTRUE(as.character(keywords$"$CYT") == "Accuri C6")
194 }
195 #
196 # Convert FCS file
197 #
198 convertFCS <- function(fcs, debug=FALSE) {
199 # Check file type and FCS version
200 if (class(fcs)[1] != "flowFrame") {
201 print("convertFCS requires flowFrame object as input")
202 return(FALSE)
203 }
204 keywords <- keyword(fcs)
205 markers <- colnames(fcs)
206 params <- fcs@parameters
207 list_description <- fcs@description
208
209 if (debug) {
210 print("****Inside convertFCS")
211 print(paste(" FCS version:", keywords$FCSversion))
212 print(paste(" DATATYPE:", keywords['$DATATYPE']))
213 }
214 if (keywords$FCSversion == "2" || keywords$FCSversion == "3" ||
215 keywords$FCSversion == "3.1" ) {
216 datatype <- unlist(keywords['$DATATYPE'])
217 if (datatype == 'F') {
218 # Process fcs expression data, using transformation
219 # based on category of the marker.
220 fcs_exprs <- exprs(fcs)
221 fcs_channel <- NULL
222 for (i in 1:length(markers)){
223 markertype <- getMarkerType(markers[i], debug)
224 rangekeyword <- paste("$P", i, "R", sep="")
225 flowcore_min <- paste("flowCore_", rangekeyword, "min", sep="")
226 flowcore_max <- paste("flowCore_", rangekeyword, "max", sep="")
227 channelrange <- as.numeric(keywords[rangekeyword])
228 if (debug) {
229 print(paste(" Marker name:", markers[i]))
230 print(paste(" Marker type:", markertype))
231 print(paste(" Range value:", keywords[rangekeyword]))
232 }
233
234 if (markertype == "TIME") {
235 channel <- iptime(fcs_exprs[, i])
236 } else {
237 if (markertype == "SCATTER") {
238 channel <- ipscatter(scaleData(fcs_exprs[, i], channelrange))
239 } else {
240 # Apply logicle transformation on fluorescent channels
241 channel <- iplogicle(scaleData(fcs_exprs[, i], channelrange))
242 }
243 # adjust range in parameters and list description
244 if (params@data$range[i] > 4096){
245 params@data$range[i] <- 4096
246 params@data$minRange[i] <- 0
247 params@data$maxRange[i] <- 4096
248 list_description[rangekeyword] <- 4096
249 list_description[flowcore_min] <- 0
250 list_description[flowcore_max] <- 4096
251 }
252 }
253 fcs_channel <- cbind(fcs_channel, round(channel))
254 }
255 colnames(fcs_channel) <- markers
256 } else {
257 if (datatype != 'I') {
258 print(paste("Data type", datatype, "in FCS 3 is not supported"))
259 }
260 fcs_channel <- exprs(fcs)
261 colnames(fcs_channel) <- markers
262 }
263 } else {
264 print(paste("FCS version", keyword(fcs)$FCSversion, "is not supported"))
265 fcs_channel <- exprs(fcs)
266 colnames(fcs_channel) <- markers
267 }
268 newfcs <- flowFrame(fcs_channel, params, list_description)
269 return(newfcs)
270 }
271 #
272 # Starting function for processing a FCS file
273 #
274 processFCSFile <- function(input_file, output_file="", compensate=FALSE,
275 outformat="flowtext", gate=FALSE,
276 graph_file="", report="", method="",
277 scaling_factor, logicle_w=0.5, logicle_t=262144,
278 logicle_m=4.5, debug=FALSE) {
279 #
280 # Generate the file names for the output_file
281 #
282 pieces <- unlist(strsplit(input_file, .Platform$file.sep))
283 filename <- pieces[length(pieces)]
284 if (debug) {
285 print (paste("Converting file: ",input_file))
286 print (paste("Original file name: ",filename))
287 print (paste("Output file name: ",output_file))
288 }
289 fcs <- read.FCS(input_file, transformation=F)
290 keywords <- keyword(fcs)
291 markers <- colnames(fcs)
292 print_markers <- as.vector(pData(parameters(fcs))$desc)
293 # Update print_markers if the $P?S not in the FCS file
294 for (i in 1:length(print_markers)) {
295 if (is.na(print_markers[i])) {
296 print_markers[[i]] <- markers[i]
297 }
298 }
299 #
300 # Compensate
301 #
302 spill <- keywords$SPILL
303
304 if (is.null(spill) == FALSE && compensate == TRUE) {
305 if (debug) {
306 print("Attempting compensation")
307 }
308 tryCatch({fcs = compensate(fcs, spill)},
309 error = function(ex) {str(ex); })
310 }
311 #
312 # Transform the data
313 #
314 transformed_data <- fcs
315 channels_to_exclude <- c(grep(colnames(fcs), pattern="FSC"),
316 grep(colnames(fcs), pattern="SSC"),
317 grep(colnames(fcs), pattern="Time"))
318 list_channels <- colnames(fcs)[-channels_to_exclude]
319 if (isAccuriData(keywords)) {
320 print("Accuri data is not supported")
321 } else if (method != "None"){
322 if (method == "fcstrans"){
323 transformed_data <- convertFCS(fcs, debug)
324 } else if (method == "logicle_auto"){
325 lgcl <- estimateLogicle(fcs, channels = list_channels)
326 transformed_data <- transform(fcs, lgcl)
327 } else {
328 if (method == "arcsinh"){
329 trans <- arcsinhTransform(transformationId="defaultArcsinhTransform",
330 a = 0, b = scaling_factor, c = 0)
331 } else if (method == "logicle"){
332 trans <- logicleTransform(w = logicle_w, t = logicle_t, m = logicle_m)
333 }
334 translist <- transformList(list_channels, trans)
335 transformed_data <- transform(fcs, translist)
336 }
337 }
338 trans_gated_data <- transformed_data
339 #
340 # Gate data
341 #
342 if (gate){
343 # check that there are SSC and FSC channels to gate on
344 chans <- c(grep(colnames(transformed_data), pattern="FSC"),
345 grep(colnames(transformed_data), pattern="SSC"))
346 totalchans <- chans
347 if (length(chans) > 2) {
348 #get first FSC and corresponding SSC
349 chans <- c(grep(colnames(transformed_data), pattern="FSC-A"),
350 grep(colnames(transformed_data), pattern="SSC-A"))
351 if (length(chans) == 0) {
352 chans <- c(grep(colnames(transformed_data), pattern="FSC-H"),
353 grep(colnames(transformed_data), pattern="SSC-H"))
354 if (length(chans) == 0) {
355 chans <- c(grep(colnames(transformed_data), pattern="FSC-W"),
356 grep(colnames(transformed_data), pattern="SSC-W"))
357 }
358 }
359 }
360 if (length(chans) == 0) {
361 warning('No forward/side scatter channels found, gating aborted.')
362 } else {
363 # gate lymphocytes
364 lymph <- flowDensity(obj=transformed_data, channels=chans,
365 position=c(TRUE, NA),
366 debris.gate=c(TRUE, FALSE))
367 # gate singlets if A and H/W
368 if (length(totalchans) > 2) {
369 trans_gated_data <- getflowFrame(flowDensity(obj=lymph,
370 singlet.gate=TRUE))
371 } else {
372 trans_gated_data <- getflowFrame(lymph)
373 }
374 # report
375 pregating_summary <- capture.output(summary(transformed_data))
376 pregating_dim <- capture.output(dim(transformed_data))
377 postgating_summary <- capture.output(summary(trans_gated_data))
378 postgating_dim <- capture.output(dim(trans_gated_data))
379 sink(report)
380 cat("#########################\n")
381 cat("## BEFORE GATING ##\n")
382 cat("#########################\n")
383 cat(pregating_dim, pregating_summary, sep="\n")
384 cat("\n#########################\n")
385 cat("## AFTER GATING ##\n")
386 cat("#########################\n")
387 cat(postgating_dim, postgating_summary, sep="\n")
388 sink()
389 # plots
390 time_channel <- grep(toupper(colnames(transformed_data)), pattern="TIME")
391 nb_markers <- length(colnames(transformed_data)) - length(time_channel)
392 nb_rows <- ceiling(((nb_markers-1)*nb_markers)/4)
393 h <- 400 * nb_rows
394 maxrange <- transformed_data@parameters@data$range[1]
395
396 png(graph_file, type="cairo", height=h, width=800)
397 par(mfrow=c(nb_rows,2))
398 for (m in 1:(nb_markers - 1)) {
399 for (n in (m+1):nb_markers) {
400 plotDens(transformed_data, c(m,n), xlab = print_markers[m],
401 ylab = print_markers[n], main = "Before Gating",
402 ylim = c(0, maxrange), xlim = c(0, maxrange))
403 plotDens(trans_gated_data, c(m,n), xlab = print_markers[m],
404 ylab = print_markers[n], main = "After Gating",
405 ylim = c(0, maxrange), xlim = c(0, maxrange))
406 }
407 }
408 dev.off()
409 }
410 }
411 if (outformat=="FCS") {
412 write.FCS(trans_gated_data, output_file)
413 } else if (outformat=="flowFrame") {
414 saveRDS(trans_gated_data, file = output_file)
415 } else {
416 output_data <- exprs(trans_gated_data)
417 colnames(output_data) <- print_markers
418 write.table(output_data, file=output_file, quote=F,
419 row.names=F,col.names=T, sep='\t', append=F)
420 }
421 }
422 # Convert FCS file using FCSTrans logicile transformation
423 # @param input_file FCS file to be transformed
424 # @param output_file FCS file transformed ".txt" extension
425 # @param compensate Flag indicating whether to apply compensation
426 # matrix if it exists.
427 transformFCS <- function(input_file, output_file, compensate=FALSE,
428 outformat="flowtext", gate=FALSE, graph_file="",
429 report_file="", trans_met="", scaling_factor=1/150,
430 w=0.5, t=262144, m=4.5, debug=FALSE) {
431 isValid <- F
432 # Check file beginning matches FCS standard
433 tryCatch({
434 isValid <- isFCSfile(input_file)
435 }, error = function(ex) {
436 print (paste(" ! Error in isFCSfile", ex))
437 })
438 if (isValid) {
439 processFCSFile(input_file, output_file, compensate, outformat,
440 gate, graph_file, report_file, trans_met, scaling_factor,
441 w, t, m)
442 } else {
443 print (paste(input_file, "does not meet FCS standard"))
444 }
445 }
446 #
447 # Run FCS Gate-Trans
448 #
449 args <- commandArgs(trailingOnly = TRUE)
450 graphs <- ""
451 report <- ""
452 gate <- FALSE
453 trans_method <- "None"
454 scaling_factor <- 1 / 150
455 w <- 0.5
456 t <- 262144
457 m <- 4.5
458 if (args[5]!="None") {
459 gate <- TRUE
460 graphs <- args[5]
461 report <- args[6]
462 }
463 if (args[7]!="None"){
464 trans_method <- args[7]
465 if (args[7] == "arcsinh"){
466 scaling_factor <- 1 / as.numeric(args[8])
467 } else if (args[7] == "logicle"){
468 w <- args[8]
469 t <- args[9]
470 m <- args[10]
471 }
472 }
473 transformFCS(args[1], args[2], args[3], args[4], gate, graphs,
474 report, trans_method, scaling_factor, w, t, m)