comparison metacyto_autocluster.R @ 0:c744db871f90 draft default tip

"planemo upload for repository https://github.com/AstraZeneca-Omics/immport-galaxy-tools/tree/master/flowtools/metacyto_autocluster commit 3cc1083d473530ed4f7439d590568baa51a46857"
author azomics
date Tue, 27 Jul 2021 23:00:49 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c744db871f90
1 #!/usr/bin/env Rscript
2 ######################################################################
3 # Copyright (c) 2018 Northrop Grumman.
4 # All rights reserved.
5 ######################################################################
6 #
7 # Version 1 - January 2018
8 # Author: Cristel Thomas
9 #
10 #
11
12 library(flowCore)
13 library(MetaCyto)
14
15 check_cluster_def <- function(cl_def) {
16 if (cl_def == "" || cl_def == "None") {
17 quit(save = "no", status = 14, runLast = FALSE)
18 } else {
19 tmp <- gsub(" ", "", cl_def, fixed = TRUE)
20 clean_def <- gsub(",", "|", tmp, fixed = TRUE)
21 return(toupper(clean_def))
22 }
23 }
24
25 path_to_group_file <- function(path_to_result) {
26 grp <- basename(dirname(path_to_result))
27 return(paste(grp, "fcs", sep = ".", collapse = NULL))
28 }
29
30 group_file_to_group_name <- function(result_file) {
31 return(strsplit(result_file, ".", fixed = TRUE)[[1]][1])
32 }
33
34 auto_cluster_panels <- function(params, df, fcspaths, fcsnames, quant=0.95,
35 events=0.05, cluster_algorithm="FlowSOM",
36 clusters=vector(), outdir="", list_clust="",
37 metacluster=40, xdim=10, ydim=10, seed=42, uc="") {
38
39 working_dir <- "tmp_metacyto"
40 working_out <- "tmp_metacyto_out"
41 dir.create(working_dir)
42 dir.create(outdir)
43
44 # get nb of groups
45 nb_groups <- length(fcsnames)
46
47 # reformat summary -- expects csv + 'fcs_names' && 'fcs_files'
48 new_df <- file.path(working_dir, "processed_sample_summary.csv")
49 df$fcs_names <- df$filenames
50 df$fcs_files <- df$filenames
51 write.csv(df, file = new_df, row.names = F)
52
53 # move && rename FCS files to same directory
54 for (i in seq_len(length(fcspaths))) {
55 new_file <- file.path(working_dir, fcsnames[[i]])
56 if (!grepl(".fcs$", new_file)) {
57 new_file <- paste0(new_file, ".fcs")
58 }
59 file.copy(fcspaths[[i]], new_file)
60 }
61
62 #### will need to add other parameters when Zicheng has them working.
63 if (cluster_algorithm == "FlowSOM") {
64 cluster_label <- autoCluster.batch(preprocessOutputFolder = working_dir,
65 excludeClusterParameters = params,
66 labelQuantile = quant,
67 clusterFunction = flowSOM.MC,
68 minPercent = events,
69 k = metacluster,
70 xdim = xdim,
71 ydim = ydim,
72 seed = seed)
73
74 } else {
75 cluster_label <- autoCluster.batch(preprocessOutputFolder = working_dir,
76 excludeClusterParameters = params,
77 labelQuantile = quant,
78 clusterFunction = flowHC,
79 minPercent = events)
80 }
81
82
83 # Add potential user-defined label to cluster definitions
84 if (length(clusters) > 1) {
85 cluster_label <- c(cluster_label, clusters)
86 }
87 write.table(cluster_label, list_clust, quote = F, row.names = F, col.names = F)
88
89 # Derive summary statistics for the clusters
90 # Result will be written out to the directory speficied by the "outpath" argument
91 searchCluster.batch(preprocessOutputFolder = working_dir,
92 outpath = working_out,
93 clusterLabel = cluster_label)
94
95 result_files <- list.files(working_out,
96 pattern = "cluster_stats_in_each_sample",
97 recursive = T,
98 full.names = T)
99 no_results <- vector()
100 if (length(result_files) != nb_groups) {
101 groups_with_results <- sapply(result_files, path_to_group_file)
102 ## one or more groups with no results, figure out which
103 no_results <- setdiff(fcsnames, groups_with_results)
104 }
105
106 if (length(no_results) == nb_groups) {
107 sink(uc)
108 cat("No clusters were found in none of the groups.")
109 sink()
110 } else {
111 unused_clrs <- list()
112 if (length(no_results > 0)) {
113 grp_no_results <- sapply(no_results, group_file_to_group_name)
114 unused_clrs <- data.frame("cluster_label" = "any", "not_found_in" = grp_no_results)
115 }
116 for (result in result_files) {
117 group_name <- strsplit(result, .Platform$file.sep)[[1]][2]
118 new_filename <- paste(c(group_name, "cluster_stats.txt"), collapse = "_")
119 new_path <- file.path(outdir, new_filename)
120 tmp_df <- read.csv(result)
121
122 used_clr <- as.character(unique(tmp_df$label))
123 if (length(used_clr) != length(cluster_label)) {
124 unused <- setdiff(cluster_label, used_clr)
125 tmp_udf <- data.frame("cluster_label" = unused, "not_found_in" = group_name)
126 unused_clrs <- rbind(unused_clrs, tmp_udf)
127 }
128 colnames(tmp_df)[[1]] <- "group_name"
129 write.table(tmp_df, new_path, quote = F, row.names = F, col.names = T, sep = "\t")
130 }
131
132 if (is.null(dim(unused_clrs))) {
133 sink(uc)
134 cat("All provided cluster definition were found in provided FCS files.")
135 sink()
136 } else {
137 write.table(unused_clrs, uc, quote = F, row.names = F, col.names = T, sep = "\t")
138 }
139 }
140 }
141
142 check_input <- function(params = vector(), report = "", fcs_files = list(),
143 grp_names = list(), quant = 0.95, events = 0.05,
144 cluster_algorithm = "FlowSOM", clusters = vector(), outdir = "",
145 list_clust = "", metacluster = 40, xdim = 10, ydim = 10,
146 seed = 42, unused = "") {
147 # check FCS files
148 fcspaths <- unlist(fcs_files)
149 fcsnames <- unlist(grp_names)
150 ct_files <- 0
151 some_pb <- FALSE
152 for (i in seq_len(length(fcspaths))) {
153 is_file_valid <- FALSE
154 tryCatch({
155 fcs <- read.FCS(fcspaths[[i]], transformation = FALSE)
156 is_file_valid <- TRUE
157 }, error = function(ex) {
158 print(paste("File is not a valid FCS file:", fcsnames[[i]], ex))
159 })
160 if (is_file_valid) {
161 metacyto_pp_check <- if ("sample_id" %in% colnames(fcs)) TRUE else FALSE
162 if (metacyto_pp_check) {
163 idx <- length(colnames(fcs))
164 ct_files <- ct_files + max(fcs@exprs[, idx])
165 } else {
166 quit(save = "no", status = 11, runLast = FALSE)
167 }
168 } else {
169 some_pb <- TRUE
170 }
171 }
172 # check summary file format
173 df <- read.table(report, sep = "\t", header = T, colClasses = "character")
174 nm <- colnames(df)
175 check_ab <- if ("antibodies" %in% nm) TRUE else FALSE
176 check_sdy <- if ("study_id" %in% nm) TRUE else FALSE
177
178 if (check_sdy && check_ab) {
179 # check that summary index compatible with FCSs in collection - by number of files == index nb?
180 if (ct_files != length(df$antibodies)) {
181 quit(save = "no", status = 12, runLast = FALSE)
182 }
183 } else {
184 quit(save = "no", status = 13, runLast = FALSE)
185 }
186
187 if (some_pb) {
188 quit(save = "no", status = 10, runLast = FALSE)
189 } else {
190 auto_cluster_panels(params, df, fcspaths, fcsnames, quant, events,
191 cluster_algorithm, clusters, outdir, list_clust,
192 metacluster, xdim, ydim, seed, unused)
193 }
194 }
195
196 ################################################################################
197 ################################################################################
198 args <- commandArgs(trailingOnly = TRUE)
199
200 ex_param <- c("FSC-A", "FSC-W", "FSC-H", "FSC", "SSC", "SSC-A", "SSC-W",
201 "SSC-H", "Time", "Cell_length", "cell_length", "CELL_LENGTH")
202
203 if (args[5] != "" && args[5] != "None") {
204 tmp <- gsub(" ", "", args[5], fixed = TRUE)
205 eparam <- unlist(strsplit(tmp, ","))
206 ex_param <- toupper(eparam)
207 }
208
209 i <- grep(args, pattern = "PARAM")
210 ii <- grep(args, pattern = "FCS_FILES")
211
212 cluster_def <- vector()
213 if (i > 8) {
214 id <- i - 1
215 cl_df <- args[8:id]
216 cluster_def <- sapply(cl_df, check_cluster_def)
217 }
218
219 metacluster <- 40
220 xdim <- 10
221 ydim <- 10
222 seed <- 42
223 if (i + 1 != ii) {
224 metacluster <- as.numeric(args[i + 1])
225 xdim <- as.numeric(args[i + 2])
226 ydim <- as.numeric(args[i + 3])
227 seed <- as.numeric(args[i + 4])
228 }
229
230 fcs_files <- list()
231 fcs_names <- list()
232 j <- 1
233 m <- ii + 1
234 n <- length(args) - 1
235 tmp_fcs <- args[m:n]
236
237 for (k in seq_len(length(tmp_fcs))) {
238 if (k %% 2) {
239 fcs_files[[j]] <- tmp_fcs[[k]]
240 fcs_names[[j]] <- tmp_fcs[[k + 1]]
241 j <- j + 1
242 }
243 }
244
245 check_input(ex_param, args[1], fcs_files, fcs_names, as.numeric(args[3]),
246 as.numeric(args[4]), args[2], cluster_def, args[6], args[7],
247 metacluster, xdim, ydim, seed, args[length(args)])