comparison metacyto_search_clr.R @ 0:94ac403d134a draft default tip

"planemo upload for repository https://github.com/AstraZeneca-Omics/immport-galaxy-tools/tree/master/flowtools/metacyto_search_clr commit a1b796a09f6b30919a73b5ded0ce5a6378317007"
author azomics
date Wed, 28 Jul 2021 22:02:38 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:94ac403d134a
1 #!/usr/bin/env Rscript
2 ######################################################################
3 # Copyright (c) 2018 Northrop Grumman.
4 # All rights reserved.
5 ######################################################################
6 #
7 # Version 1 - January 2018
8 # Author: Cristel Thomas
9 #
10 #
11
12 library(flowCore)
13 library(MetaCyto)
14
15 check_cluster_def <- function(cl_def) {
16 if (cl_def == "" || cl_def == "None") {
17 quit(save = "no", status = 14, runLast = FALSE)
18 } else {
19 tmp <- gsub(" ", "", cl_def, fixed = TRUE)
20 clean_def <- gsub(",", "|", tmp, fixed = TRUE)
21 return(toupper(clean_def))
22 }
23 }
24
25 path_to_group_file <- function(path_to_result) {
26 grp <- basename(dirname(path_to_result))
27 return(paste(grp, "fcs", sep = ".", collapse = NULL))
28 }
29
30 group_file_to_group_name <- function(result_file) {
31 return(strsplit(result_file, ".", fixed = TRUE)[[1]][1])
32 }
33
34
35 search_cluster_panels <- function(df, fcspaths, fcsnames, outdir="", uc="",
36 clusters=vector()) {
37
38 working_dir <- "tmp_metacyto"
39 working_out <- "tmp_metacyto_out"
40 dir.create(working_dir)
41 dir.create(outdir)
42
43 # reformat summary -- expects csv + 'fcs_names' && 'fcs_files'
44 new_df <- file.path(working_dir, "processed_sample_summary.csv")
45 df$fcs_names <- df$filenames
46 df$fcs_files <- df$filenames
47 write.csv(df, file = new_df, row.names = F)
48
49 # move && rename FCS files to same directory
50 for (i in seq_len(length(fcspaths))) {
51 new_file <- file.path(working_dir, fcsnames[[i]])
52 file.copy(fcspaths[[i]], new_file)
53 }
54
55 searchCluster.batch(preprocessOutputFolder = working_dir,
56 outpath = working_out,
57 clusterLabel = clusters)
58
59 result_files <- list.files(working_out,
60 pattern = "cluster_stats_in_each_sample",
61 recursive = T,
62 full.names = T)
63
64 nb_groups <- length(fcsnames)
65 no_results <- vector()
66 if (length(result_files) != nb_groups) {
67 groups_with_results <- sapply(result_files, path_to_group_file)
68 ## one or more groups with no results, figure out which
69 no_results <- setdiff(fcsnames, groups_with_results)
70 }
71
72 if (length(no_results) == nb_groups) {
73 sink(uc)
74 cat("No clusters were found in none of the groups.")
75 sink()
76 } else {
77 unused_clrs <- list()
78
79 if (length(no_results > 0)) {
80 grp_no_results <- sapply(no_results, group_file_to_group_name)
81 unused_clrs <- data.frame("cluster_label" = "any", "not_found_in" = grp_no_results)
82 }
83
84 for (result in result_files) {
85 group_name <- strsplit(result, .Platform$file.sep)[[1]][2]
86 new_filename <- paste(c(group_name, "cluster_stats.txt"), collapse = "_")
87 new_path <- file.path(outdir, new_filename)
88 tmp_df <- read.csv(result)
89
90 used_clr <- as.character(unique(tmp_df$label))
91 if (length(used_clr) != length(clusters)) {
92 unused <- setdiff(clusters, used_clr)
93 tmp_udf <- data.frame("cluster_label" = unused, "not_found_in" = group_name)
94 unused_clrs <- rbind(unused_clrs, tmp_udf)
95 }
96 colnames(tmp_df)[[1]] <- "group_name"
97 write.table(tmp_df, new_path, quote = F, row.names = F, col.names = T, sep = "\t")
98 }
99
100 if (is.null(dim(unused_clrs))) {
101 sink(uc)
102 cat("All provided cluster definition were found in all provided FCS files.")
103 sink()
104 } else {
105 write.table(unused_clrs, uc, quote = F, row.names = F, col.names = T, sep = "\t")
106 }
107 }
108 }
109
110
111 check_input <- function(report="", outdir="", list_unused="", list_clusters="",
112 fcs_files=list(), grp_names=list(), clusters=vector()) {
113 # check FCS files
114 fcspaths <- unlist(fcs_files)
115 fcsnames <- unlist(grp_names)
116 ct_files <- 0
117 some_pb <- FALSE
118 for (i in seq_len(length(fcspaths))) {
119 is_file_valid <- FALSE
120 tryCatch({
121 fcs <- read.FCS(fcspaths[[i]], transformation = FALSE)
122 is_file_valid <- TRUE
123 }, error = function(ex) {
124 print(paste("File is not a valid FCS file:", fcsnames[[i]], ex))
125 })
126 if (is_file_valid) {
127 metacyto_pp_check <- if ("sample_id" %in% colnames(fcs)) TRUE else FALSE
128 if (metacyto_pp_check) {
129 idx <- length(colnames(fcs))
130 ct_files <- ct_files + max(fcs@exprs[, idx])
131 } else {
132 quit(save = "no", status = 11, runLast = FALSE)
133 }
134 } else {
135 some_pb <- TRUE
136 }
137 }
138 # check summary file format
139 df <- read.table(report, sep = "\t", header = T, colClasses = "character")
140 nm <- colnames(df)
141 check_ab <- if ("antibodies" %in% nm) TRUE else FALSE
142 check_sdy <- if ("study_id" %in% nm) TRUE else FALSE
143
144 if (check_sdy && check_ab) {
145 # check that summary index compatible with FCSs in collection - by number of files == index nb
146 if (ct_files != length(df$antibodies)) {
147 quit(save = "no", status = 12, runLast = FALSE)
148 }
149 } else {
150 quit(save = "no", status = 13, runLast = FALSE)
151 }
152
153 if (some_pb) {
154 quit(save = "no", status = 10, runLast = FALSE)
155 } else {
156 write.table(clusters, list_clusters, quote = F, row.names = F, col.names = F)
157 search_cluster_panels(df, fcspaths, fcsnames, outdir, list_unused, clusters)
158 }
159 }
160
161 ################################################################################
162 ################################################################################
163 args <- commandArgs(trailingOnly = TRUE)
164
165 i <- grep(args, pattern = "FCS_FILES")
166
167 cluster_def <- vector()
168 cl_df <- args[3]
169 if (i > 6) {
170 ii <- i - 1
171 more_cl <- args[6:ii]
172 cl_df <- c(cl_df, more_cl)
173 }
174 cluster_def <- sapply(cl_df, check_cluster_def)
175
176 fcs_files <- list()
177 fcs_names <- list()
178 j <- 1
179 m <- i + 1
180 tmp_fcs <- args[m:length(args)]
181
182 for (k in seq_len(length(tmp_fcs))) {
183 if (k %% 2) {
184 fcs_files[[j]] <- tmp_fcs[[k]]
185 fcs_names[[j]] <- tmp_fcs[[k + 1]]
186 j <- j + 1
187 }
188 }
189
190 check_input(args[1], args[2], args[4], args[5], fcs_files, fcs_names,
191 cluster_def)