comparison FlowSOMCompare.R @ 1:33b8673272cf draft default tip

planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/flowsom_compare commit bbff20e20dc2b9dbb40b613a0d5f16ee8132446d
author azomics
date Fri, 29 Sep 2023 07:19:42 +0000
parents bd35f3b66a1e
children
comparison
equal deleted inserted replaced
0:bd35f3b66a1e 1:33b8673272cf
1 #!/usr/bin/Rscript 1 #!/usr/bin/env Rscript
2 # Module for Galaxy 2 # Module for Galaxy
3 # Compares groups of FCS to FlowSOM reference tree 3 # Compares groups of FCS to FlowSOM reference tree
4 # with FlowSOM 4 # with FlowSOM
5 ###################################################################### 5 ######################################################################
6 # Copyright (c) 2017 Northrop Grumman. 6 # Copyright (c) 2017 Northrop Grumman.
12 # 12 #
13 # 13 #
14 library(FlowSOM) 14 library(FlowSOM)
15 library(flowCore) 15 library(flowCore)
16 16
17 checkFiles <- function(groups){ 17 check_files <- function(groups) {
18 all_files <- unlist(groups) 18 all_files <- unlist(groups)
19 all_unique <- unique(all_files) 19 all_unique <- unique(all_files)
20 if (length(all_unique) != length(all_files)) { 20 if (length(all_unique) != length(all_files)) {
21 quit(save = "no", status = 14, runLast = FALSE) 21 quit(save = "no", status = 14, runLast = FALSE)
22 } 22 }
23 } 23 }
24 24
25 compareLists <- function(m1, m2){ 25 compare_lists <- function(m1, m2) {
26 listCheck <- T 26 list_check <- TRUE
27 if (is.na(all(m1==m2))) { 27 if (is.na(all(m1 == m2))) {
28 mm1 <- is.na(m1) 28 mm1 <- is.na(m1)
29 mm2 <- is.na(m2) 29 mm2 <- is.na(m2)
30 if (all(mm1==mm2)){ 30 if (all(mm1 == mm2)) {
31 if (!all(m1==m2, na.rm=TRUE)){ 31 if (!all(m1 == m2, na.rm = TRUE)) {
32 listCheck <- F 32 list_check <- FALSE
33 } 33 }
34 } else { 34 } else {
35 listCheck <- F 35 list_check <- FALSE
36 } 36 }
37 } else if (!all(m1==m2)) { 37 } else if (!all(m1 == m2)) {
38 listCheck <- F 38 list_check <- FALSE
39 } 39 }
40 return(listCheck) 40 return(list_check)
41 } 41 }
42 42
43 prettyMarkerNames <- function(flowFrame){ 43 pretty_marker_names <- function(flow_frame) {
44 n <- flowFrame@parameters@data[, "name"] 44 n <- flow_frame@parameters@data[, "name"]
45 d <- flowFrame@parameters@data[, "desc"] 45 d <- flow_frame@parameters@data[, "desc"]
46 d[is.na(d)] <- n[is.na(d)] 46 d[is.na(d)] <- n[is.na(d)]
47 prettyNames <- list() 47 pretty_names <- list()
48 if(any(grepl("#",d))){ 48 if (any(grepl("#", d))) {
49 # Support for hashtag notation: 49 # Support for hashtag notation:
50 # antibody#fluorochrome -> antibody (fluorochrome) 50 pretty_names <- gsub("#(.*)$", " (\\1)", d)
51 prettyNames <- gsub("#(.*)$"," (\\1)",d)
52 } else { 51 } else {
53 prettyNames <- paste(d, " <", n, ">", sep="") 52 pretty_names <- paste(d, " <", n, ">", sep = "")
54 } 53 }
55 return(prettyNames) 54 return(pretty_names)
56 } 55 }
57 56
58 compareToTree <- function(fst, wilc_thresh=0.05, output="", plot="", stats, 57 compare_to_tree <- function(fst,
59 comp_groups, filenames) { 58 wilc_thresh = 0.05, output = "", plot = "", stats,
60 groupRes <- CountGroups(fst, groups=comp_groups, plot=FALSE) 59 comp_groups, filenames) {
61 pdf(plot, useDingbats=FALSE, onefile=TRUE) 60 group_res <- CountGroups(fst, groups = comp_groups, plot = FALSE)
61 pdf(plot, useDingbats = FALSE, onefile = TRUE)
62 tresh <- wilc_thresh 62 tresh <- wilc_thresh
63 pg <- PlotGroups(fst, groupRes, p_tresh=tresh) 63 pg <- PlotGroups(fst, group_res, p_tresh = tresh)
64 dev.off() 64 dev.off()
65 65
66 nb_nodes <- length(pg[[1]]) 66 nb_nodes <- length(pg[[1]])
67 nb_comp <- length(pg) 67 nb_comp <- length(pg)
68 m <- matrix(0, nrow=nb_nodes, ncol=nb_comp+1) 68 m <- matrix(0, nrow = nb_nodes, ncol = nb_comp + 1)
69 s <- seq_len(nb_nodes) 69 s <- seq_len(nb_nodes)
70 m[,1]<- as.character(s) 70 m[, 1] <- as.character(s)
71 for (i in 1:nb_comp){ 71 for (i in 1:nb_comp) {
72 m[s,i+1]<- as.character(pg[[i]]) 72 m[s, i + 1] <- as.character(pg[[i]])
73 } 73 }
74 groupnames <- attr(comp_groups,"names") 74 groupnames <- attr(comp_groups, "names")
75 out_colnames <- paste(groupnames, collapse="-") 75 out_colnames <- paste(groupnames, collapse = "-")
76 colnames(m) <- c("Node",out_colnames) 76 colnames(m) <- c("Node", out_colnames)
77 write.table(m, file=output, quote=F, row.names=F, col.names=T, sep='\t', 77 write.table(m, file = output, quote = FALSE, row.names = FALSE,
78 append=F) 78 col.names = TRUE, sep = "\t",
79 append = FALSE)
79 80
80 ## get filenames 81 ## get filenames
81 filepaths <- unlist(comp_groups) 82 filepaths <- unlist(comp_groups)
82 fnames <- unlist(filenames) 83 fnames <- unlist(filenames)
83 nb_files <- length(filepaths) 84 nb_files <- length(filepaths)
84 comp_files <- list() 85 comp_files <- list()
85 for (i in 1:length(filepaths)){ 86 for (i in seq_along(filepaths)) {
86 comp_files[[filepaths[[i]]]] <- fnames[[i]] 87 comp_files[[filepaths[[i]]]] <- fnames[[i]]
87 } 88 }
88 89
89 group_list <- list() 90 group_list <- list()
90 for (grp in attr(comp_groups, "names")) { 91 for (grp in attr(comp_groups, "names")) {
91 for (f in comp_groups[[grp]]){ 92 for (f in comp_groups[[grp]]) {
92 group_list[[f]] <- grp 93 group_list[[f]] <- grp
93 } 94 }
94 } 95 }
95 out_stats <- attr(stats, "names") 96 out_stats <- attr(stats, "names")
96 if ("counts" %in% out_stats){ 97 if ("counts" %in% out_stats) {
97 gp_counts <- as.matrix(groupRes$counts) 98 gp_counts <- as.matrix(group_res$counts)
98 tpc <- matrix("", nrow=nb_files, ncol=2) 99 tpc <- matrix("", nrow = nb_files, ncol = 2)
99 tpc[,1] <- as.character(lapply(rownames(gp_counts), function(x) comp_files[[x]])) 100 tpc[, 1] <- as.character(
100 tpc[,2] <- as.character(lapply(rownames(gp_counts), function(x) group_list[[x]])) 101 lapply(rownames(gp_counts),
102 function(x) comp_files[[x]]))
103 tpc[, 2] <- as.character(
104 lapply(rownames(gp_counts),
105 function(x) group_list[[x]]))
101 gp_counts <- cbind(tpc, gp_counts) 106 gp_counts <- cbind(tpc, gp_counts)
102 colnames(gp_counts)[[1]] <- "Filename" 107 colnames(gp_counts)[[1]] <- "Filename"
103 colnames(gp_counts)[[2]] <- "Group" 108 colnames(gp_counts)[[2]] <- "Group"
104 t_gp_counts <- t(gp_counts) 109 t_gp_counts <- t(gp_counts)
105 write.table(t_gp_counts, file=stats[["counts"]], quote=F, row.names=T, col.names=F, sep='\t', 110 write.table(t_gp_counts,
106 append=F) 111 file = stats[["counts"]],
107 } 112 quote = FALSE,
108 if ("pctgs" %in% out_stats){ 113 row.names = TRUE,
109 gp_prop <- as.matrix(groupRes$pctgs) 114 col.names = FALSE,
110 tpp <- matrix("", nrow=nb_files, ncol=2) 115 sep = "\t",
111 tpp[,1] <- as.character(lapply(rownames(gp_prop), function(x) comp_files[[x]])) 116 append = FALSE)
112 tpp[,2] <- as.character(lapply(rownames(gp_prop), function(x) group_list[[x]])) 117 }
118 if ("pctgs" %in% out_stats) {
119 gp_prop <- as.matrix(group_res$pctgs)
120 tpp <- matrix("", nrow = nb_files, ncol = 2)
121 tpp[, 1] <- as.character(
122 lapply(rownames(gp_prop),
123 function(x) comp_files[[x]]))
124 tpp[, 2] <- as.character(
125 lapply(rownames(gp_prop),
126 function(x) group_list[[x]]))
113 gp_prop <- cbind(tpp, gp_prop) 127 gp_prop <- cbind(tpp, gp_prop)
114 colnames(gp_prop)[[1]] <- "Filename" 128 colnames(gp_prop)[[1]] <- "Filename"
115 colnames(gp_prop)[[2]] <- "Group" 129 colnames(gp_prop)[[2]] <- "Group"
116 t_gp_prop <- t(gp_prop) 130 t_gp_prop <- t(gp_prop)
117 write.table(t_gp_prop, file=stats[["pctgs"]], quote=F, row.names=T, col.names=F, sep='\t', 131 write.table(t_gp_prop,
118 append=F) 132 file = stats[["pctgs"]],
119 } 133 quote = FALSE,
120 if ("means" %in% out_stats){ 134 row.names = TRUE,
121 gp_mean <- as.matrix(groupRes$means) 135 col.names = FALSE,
136 sep = "\t",
137 append = FALSE)
138 }
139 if ("means" %in% out_stats) {
140 gp_mean <- as.matrix(group_res$means)
122 t_gp_mean <- t(gp_mean) 141 t_gp_mean <- t(gp_mean)
123 tpm <- matrix(0, nrow=nb_nodes, ncol=1) 142 tpm <- matrix(0, nrow = nb_nodes, ncol = 1)
124 tpm[,1] <- seq_len(nb_nodes) 143 tpm[, 1] <- seq_len(nb_nodes)
125 t_gp_mean <- cbind(tpm, t_gp_mean) 144 t_gp_mean <- cbind(tpm, t_gp_mean)
126 colnames(t_gp_mean)[[1]] <- "Nodes" 145 colnames(t_gp_mean)[[1]] <- "Nodes"
127 write.table(t_gp_mean, file=stats[["means"]], quote=F, row.names=F, col.names=T, sep='\t', 146 write.table(t_gp_mean,
128 append=F) 147 file = stats[["means"]],
129 } 148 quote = FALSE,
130 if ("medians" %in% out_stats){ 149 row.names = FALSE,
131 gp_med <- as.matrix(groupRes$medians) 150 col.names = TRUE,
151 sep = "\t",
152 append = FALSE)
153 }
154 if ("medians" %in% out_stats) {
155 gp_med <- as.matrix(group_res$medians)
132 t_gp_med <- t(gp_med) 156 t_gp_med <- t(gp_med)
133 tpd <- matrix(0, nrow=nb_nodes, ncol=1) 157 tpd <- matrix(0, nrow = nb_nodes, ncol = 1)
134 tpd[,1] <- seq_len(nb_nodes) 158 tpd[, 1] <- seq_len(nb_nodes)
135 t_gp_med <- cbind(tpd, t_gp_med) 159 t_gp_med <- cbind(tpd, t_gp_med)
136 colnames(t_gp_med)[[1]] <- "Nodes" 160 colnames(t_gp_med)[[1]] <- "Nodes"
137 write.table(t_gp_med, file=stats[["medians"]], quote=F, row.names=F, col.names=T, sep='\t', 161 write.table(t_gp_med,
138 append=F) 162 file = stats[["medians"]],
139 } 163 quote = FALSE,
140 } 164 row.names = FALSE,
141 165 col.names = TRUE,
142 checkFCS <- function(tree, output="", plot="", thresh = 0.05, stats, groups, 166 sep = "\t",
143 filenames) { 167 append = FALSE)
168 }
169 }
170
171 check_fcs <- function(tree,
172 output = "", plot = "", thresh = 0.05, stats, groups,
173 filenames) {
144 174
145 fcsfiles <- unlist(groups) 175 fcsfiles <- unlist(groups)
146 tree_valid <- F 176 tree_valid <- FALSE
147 markerCheck <- T 177 marker_check <- TRUE
148 tryCatch({ 178 tryCatch({
149 fsomtree <- readRDS(tree) 179 fsomtree <- readRDS(tree)
150 tree_valid <- T 180 tree_valid <- TRUE
151 }, error = function(ex) { 181 }, error = function(ex) {
152 print(paste(ex)) 182 print(paste(ex))
153 }) 183 })
154 184
155 fst <- if (length(fsomtree)==2) fsomtree[[1]] else fsomtree 185 fst <- if (length(fsomtree) == 2) fsomtree[[1]] else fsomtree
156 186
157 if (tree_valid){ 187 if (tree_valid) {
158 tree_markers <- as.vector(fst$prettyColnames) 188 tree_markers <- as.vector(fst$prettyColnames)
159 tree_channels <- as.vector(colnames(fst$data)) 189 if (length(tree_markers) < 1) {
160 if (length(tree_markers) < 1){
161 quit(save = "no", status = 11, runLast = FALSE) 190 quit(save = "no", status = 11, runLast = FALSE)
162 } 191 }
163 } else { 192 } else {
164 quit(save = "no", status = 11, runLast = FALSE) 193 quit(save = "no", status = 11, runLast = FALSE)
165 } 194 }
166 195
167 for (i in 1:length(fcsfiles)){ 196 for (i in seq_along(fcsfiles)) {
168 is_file_valid <- F
169 tryCatch({ 197 tryCatch({
170 fcs <- read.FCS(fcsfiles[i], transformation=FALSE) 198 fcs <- read.FCS(fcsfiles[i], transformation = FALSE)
171 is_file_valid <- T
172 }, error = function(ex) { 199 }, error = function(ex) {
173 print(paste(ex)) 200 print(paste(ex))
174 }) 201 })
175 if (i == 1) { 202 if (i == 1) {
176 m1 <- as.vector(pData(parameters(fcs))$desc) 203 m1 <- as.vector(pData(parameters(fcs))$desc)
177 c1 <- colnames(fcs) 204 c1 <- colnames(fcs)
178 # compare to tree markers 205 # compare to tree markers
179 pm <- prettyMarkerNames(fcs) 206 pm <- pretty_marker_names(fcs)
180 if (!all(tree_markers %in% pm)){ 207 if (!all(tree_markers %in% pm)) {
181 quit(save = "no", status = 13, runLast = FALSE) 208 quit(save = "no", status = 13, runLast = FALSE)
182 } 209 }
183 } else { 210 } else {
184 m2 <- as.vector(pData(parameters(fcs))$desc) 211 m2 <- as.vector(pData(parameters(fcs))$desc)
185 c2 <- colnames(fcs) 212 c2 <- colnames(fcs)
186 markerCheck <- compareLists(m1,m2) 213 marker_check <- compare_lists(m1, m2)
187 markerChannel <- compareLists(c1,c2) 214 marker_channel <- compare_lists(c1, c2)
188 } 215 }
189 } 216 }
190 if (markerCheck && markerChannel) { 217 if (marker_check && marker_channel) {
191 compareToTree(fst, thresh, output, plot, stats, groups, filenames) 218 compare_to_tree(fst, thresh, output, plot, stats, groups, filenames)
192 } else { 219 } else {
193 quit(save = "no", status = 12, runLast = FALSE) 220 quit(save = "no", status = 12, runLast = FALSE)
194 } 221 }
195 } 222 }
196 223
199 first_g1 <- 5 226 first_g1 <- 5
200 tot_args <- length(args) 227 tot_args <- length(args)
201 g <- list() 228 g <- list()
202 tmplist <- c("counts", "means", "medians", "pctgs") 229 tmplist <- c("counts", "means", "medians", "pctgs")
203 230
204 for (i in 5:13){ 231 for (i in 5:13) {
205 if (args[i] %in% tmplist){ 232 if (args[i] %in% tmplist) {
206 first_g1 <- first_g1 + 2 233 first_g1 <- first_g1 + 2
207 g[[args[i]]] <- args[i+1] 234 g[[args[i]]] <- args[i + 1]
208 } 235 }
209 } 236 }
210 237
211 tmpargs <- paste(args[first_g1:tot_args], collapse="=%=") 238 tmpargs <- paste(args[first_g1:tot_args], collapse = "=%=")
212 tmpgroups <- strsplit(tmpargs, "=%=DONE=%=") 239 tmpgroups <- strsplit(tmpargs, "=%=DONE=%=")
213 240
214 groups <- list() 241 groups <- list()
215 filenames <- list() 242 filenames <- list()
216 for (gps in tmpgroups[[1]]) { 243 for (gps in tmpgroups[[1]]) {
217 tmpgroup <- strsplit(gps, "=%=") 244 tmpgroup <- strsplit(gps, "=%=")
218 nb_files <- (length(tmpgroup[[1]]) - 1 ) /2 245 nb_files <- (length(tmpgroup[[1]]) - 1) / 2
219 tmplist <- character(nb_files) 246 tmplist <- character(nb_files)
220 tmpnames <- character(nb_files) 247 tmpnames <- character(nb_files)
221 j <- 1 248 j <- 1
222 for (i in 2:length(tmpgroup[[1]])){ 249 for (i in 2:length(tmpgroup[[1]])) {
223 if (!i%%2){ 250 if (!i %% 2) {
224 tmplist[[j]] <- tmpgroup[[1]][i] 251 tmplist[[j]] <- tmpgroup[[1]][i]
225 tmpnames[[j]]<- tmpgroup[[1]][i+1] 252 tmpnames[[j]] <- tmpgroup[[1]][i + 1]
226 j <- j + 1 253 j <- j + 1
227 } 254 }
228 } 255 }
229 groups[[tmpgroup[[1]][1]]] <- tmplist 256 groups[[tmpgroup[[1]][1]]] <- tmplist
230 filenames[[tmpgroup[[1]][1]]] <- tmpnames 257 filenames[[tmpgroup[[1]][1]]] <- tmpnames
231 } 258 }
232 259
233 checkFiles(groups) 260 check_files(groups)
234 checkFCS(args[1], args[2], args[3], args[4], g, groups, filenames) 261 check_fcs(args[1], args[2], args[3], args[4], g, groups, filenames)