comparison FlowSOMMApIndividualFCS.R @ 0:e796ed5dfd02 draft

"planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/flowsom_cross_comp commit 2f04d23cb47d9b233d159c54ff406e56c87746e0"
author azomics
date Tue, 23 Jun 2020 19:49:33 -0400
parents
children a1054bd1060a
comparison
equal deleted inserted replaced
-1:000000000000 0:e796ed5dfd02
1 #!/usr/bin/Rscript
2 # Module for Galaxy
3 # Generates FlowSOM reference tree
4 # with FlowSOM AggregateFlowFrames
5 ######################################################################
6 # Copyright (c) 2017 Northrop Grumman.
7 # All rights reserved.
8 ######################################################################
9 #
10 # Version 1
11 # Cristel Thomas
12 #
13 #
14 library(FlowSOM)
15 library(flowCore)
16
17 ## geometric mean from
18 # https://stackoverflow.com/questions/2602583/geometric-mean-is-there-a-built-in
19 gm_mean = function(x, na.rm=TRUE){
20 exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))
21 }
22
23 prettyMarkerNames <- function(flowFrame){
24 n <- flowFrame@parameters@data[, "name"]
25 d <- flowFrame@parameters@data[, "desc"]
26 d[is.na(d)] <- n[is.na(d)]
27 prettyNames <-list()
28 if(any(grepl("#",d))){
29 # Support for hashtag notation:
30 # antibody#fluorochrome -> antibody (fluorochrome)
31 prettyNames <- gsub("#(.*)$"," (\\1)",d)
32 } else {
33 prettyNames <- paste(d, " <", n, ">", sep="")
34 }
35 return(prettyNames)
36 }
37
38 checkMarkers <- function(fcsfiles, flag_ff=FALSE){
39 markerCheck <- T
40 for (i in 1:length(fcsfiles)){
41 if (flag_ff){
42 fcs <- readRDS(fcsfiles[i])
43 } else {
44 fcs <- read.FCS(fcsfiles[i], transformation=FALSE)
45 }
46 if (i == 1) {
47 m1 <- as.vector(pData(parameters(fcs))$desc)
48 } else {
49 m2 <- as.vector(pData(parameters(fcs))$desc)
50 if (is.na(all(m1==m2))) {
51 mm1 <- is.na(m1)
52 mm2 <- is.na(m2)
53 if (all(mm1==mm2)){
54 if (!all(m1==m2, na.rm=TRUE)){
55 markerCheck <- F
56 }
57 } else {
58 markerCheck <- F
59 }
60 } else if (!all(m1==m2)) {
61 markerCheck <- F
62 }
63 }
64 }
65 if (!markerCheck) {
66 quit(save = "no", status = 13, runLast = FALSE)
67 }
68 }
69
70 mapToTree <- function(filenames, filepaths, flag_ff=FALSE, reftree, cluster=10,
71 outdir="",flag_meta=FALSE, mfi='mfi', stat1="", stat2="",
72 stat3="", plot="", mplot="") {
73
74 checkMarkers(filepaths, flag_ff)
75
76 # get tree
77 fst <- readRDS(reftree)
78 plots <- FALSE
79 mplots <- FALSE
80 dir.create(outdir)
81 if (!plot==""){
82 dir.create(plot)
83 plots <- TRUE
84 }
85 if (!mplot==""){
86 dir.create(mplot)
87 mplots <- TRUE
88 }
89 metaC <- metaClustering_consensus(fst$map$codes, k=cluster, seed=33)
90 nb_pop <- if (flag_meta) cluster else max(fst$map$mapping[,1])
91 nb_samples <- length(filepaths)
92 nb_marker <- length(fst$prettyColnames)
93 print_markers <- gsub(' <.*>','',fst$prettyColnames)
94 print_markers_ff <- append(print_markers, "Population")
95
96 m_stat1 <- matrix(0, nrow=nb_samples, ncol=(nb_pop+2))
97 colnames(m_stat1) <- c("FileID", "SampleName", seq_len(nb_pop))
98
99 sink(stat2)
100 cat(print_markers, sep="\t")
101 cat("\tPercentage\tPopulation\tSampleName\n")
102 sink()
103
104 col_m3 <- c("Population")
105 for (m in print_markers){
106 m1 <- paste(m, "mean", sep="_")
107 m2 <- paste(m, "median", sep="_")
108 m3 <- paste(m, "stdev", sep="_")
109 col_m3 <- append(col_m3, c(m1,m2,m3))
110 }
111 col_stat3 <- c(col_m3, "Percentage_mean","Percentage_median","Percentage_stdev")
112
113 for (i in 1:length(filepaths)){
114 if (flag_ff){
115 ff <- readRDS(filepaths[i])
116 } else {
117 ff <- read.FCS(filepaths[i], transformation=FALSE)
118 }
119 if (i==1) {
120 # compare to tree markers
121 pm <- prettyMarkerNames(ff)
122 if (!all(fst$prettyColnames %in% pm)){
123 quit(save = "no", status = 14, runLast = FALSE)
124 }
125 }
126
127 fsom <- NewData(fst, ff)
128
129 if (mplots){
130 markers <- colnames(ff)
131 tmpmplot <- paste(filenames[i], "marker_plots.pdf", sep="_")
132 pdf(file.path(mplot,tmpmplot), useDingbats=FALSE, onefile=TRUE)
133 for (marker in markers){
134 PlotMarker(fsom, marker)
135 }
136 dev.off()
137 }
138
139 if (!plot==""){
140 plotpath <- paste(filenames[i], "tree.png", sep="_")
141 png(file.path(plot, plotpath), type="cairo", height=800, width=800)
142 PlotStars(fsom, backgroundValues = as.factor(metaC))
143 dev.off()
144 }
145
146 m <- matrix(0,nrow=nrow(ff),ncol=1)
147 s <- seq_len(nrow(ff))
148 if (flag_meta){
149 m[s,] <- metaC[fsom$map$mapping[,1]]
150 } else {
151 m[s,] <- fsom$map$mapping[,1]
152 }
153 colnames(m) <- "FlowSOM"
154 ff <- cbind2(ff,m)
155 out <- exprs(ff)
156 colnames(out) <- print_markers_ff
157
158 clr_table <- paste(filenames[i], "clustered.flowclr", sep="_")
159 write.table(out, file=file.path(outdir, clr_table), quote=F, row.names=F, col.names=T, sep='\t',
160 append=F)
161
162 cluster_count <- table(out[,"Population"])
163 cluster_prop <- prop.table(cluster_count)*100
164 m1_tmp <- numeric(nb_pop)
165 for (j in 1:nb_pop){
166 if (as.character(j) %in% names(cluster_count)){
167 m1_tmp[[j]] <- format(round(cluster_prop[[as.character(j)]], 2), nsmall=2)
168 }
169 }
170 samplename <- paste("Sample", i, sep="")
171 m_stat1[i,] <- c(filenames[i], samplename, m1_tmp)
172 # flowstat2
173 # Marker1 Marker2 Marker3 ... Population Percentage SampleName
174 # MFIs for each marker
175 # dimension ==> col = nb of markers + 3; row = nb of files * nb of clusters
176 if (mfi=="mfi"){
177 m2 <- aggregate(out[,1:nb_marker], list(out[,nb_marker+1]), mean)
178 } else if (mfi=="mdfi") {
179 m2 <- aggregate(out[,1:nb_marker], list(out[,nb_marker+1]), median)
180 } else {
181 m2 <- aggregate(out[,1:nb_marker], list(out[,nb_marker+1]), gm_mean)
182 }
183
184 m2["Percentage"] <- as.character(cluster_prop)
185 m2["Population"] <- as.character(m2$Group.1)
186 m2["SampleName"] <- samplename
187 m2t <- as.matrix(m2[2:length(m2)])
188 write.table(m2t, file=stat2, quote=F, row.names=F, col.names=F, sep='\t',
189 append=T)
190
191 }
192 write.table(m_stat1, file=stat1, quote=F, row.names=F, col.names=T, sep='\t',
193 append=F)
194
195 m2df <- read.table(stat2, sep="\t", header=T)
196 ag <- aggregate(m2df[,0:nb_marker+1], list(m2df[,nb_marker+2]), function(x) c(mn=mean(x), med=median(x), stdv=sd(x)))
197 m3t <- as.matrix(ag)
198 colnames(m3t) <- col_stat3
199 write.table(m3t, file=stat3, quote=F, row.names=F, col.names=T, sep='\t',
200 append=F)
201 }
202
203 flowFrameOrFCS <- function(filenames, filepaths, reftree, cluster=10, outdir="",
204 flag_meta=FALSE, mfi='mfi', stat1="", stat2="",
205 stat3="", plot="", mplot=""){
206
207 isValid <- F
208 is_fcs <- F
209 is_ff <- F
210 flag_ff <- FALSE
211 i <- 0
212 for (f in filepaths){
213 tryCatch({
214 is_fcs <- isFCSfile(f)
215 }, error = function(ex) {
216 print(paste(ex))
217 })
218 if (!is_fcs){
219 tryCatch({
220 ff <- readRDS(f)
221 is_ff <- T
222 }, error = function(ex) {
223 print(paste(ex))
224 })
225 } else {
226 i <- i + 1
227 }
228 if (!is_ff && !is_fcs) {
229 quit(save = "no", status = 10, runLast = FALSE)
230 }
231 }
232 if (i==0) {
233 flag_ff <- TRUE
234 } else if (!i==length(filenames)){
235 quit(save = "no", status = 12, runLast = FALSE)
236 }
237 mapToTree(filenames, filepaths, flag_ff, reftree, cluster, outdir, flag_meta,
238 mfi, stat1, stat2, stat3, plot, mplot)
239 }
240
241 args <- commandArgs(trailingOnly = TRUE)
242 plot <- ""
243 mplot <- ""
244 m <- 8
245 flag_meta <- FALSE
246 if (args[4]=='meta') {
247 flag_meta <- TRUE
248 }
249 if (args[9] == 'newDataTrees') {
250 plot <- 'newDataTrees'
251 m <- m + 1
252 if (args[10] == 'newDataMarkers') {
253 mplot <- "newDataMarkers"
254 m <- m + 1
255 }
256 } else if (args[9] == 'newDataMarkers') {
257 mplot <- "newDataMarkers"
258 m <- m + 1
259 }
260
261 n <- m+1
262 # files + filenames
263 nb_files <- (length(args) - m) / 2
264 files1 <- character(nb_files)
265 files2 <- character(nb_files)
266 j <- 1
267 file_list <- args[n:length(args)]
268 for (i in 1:length(file_list)) {
269 if (i%%2){
270 files1[[j]] <- file_list[i]
271 files2[[j]] <- file_list[i+1]
272 j <- j + 1
273 }
274 }
275
276 flowFrameOrFCS(files2, files1, args[1], as.numeric(args[3]), args[2], flag_meta,
277 args[5], args[6], args[7], args[8], plot, mplot)