Mercurial > repos > azomics > flowsom_compare
view FlowSOMCompare.R @ 0:bd35f3b66a1e draft
"planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/flowsom_compare commit a1755b91905a2a95ebb0d6dd4a2b3d42c7e19f05"
author | azomics |
---|---|
date | Mon, 22 Jun 2020 17:54:03 -0400 |
parents | |
children | 33b8673272cf |
line wrap: on
line source
#!/usr/bin/Rscript # Module for Galaxy # Compares groups of FCS to FlowSOM reference tree # with FlowSOM ###################################################################### # Copyright (c) 2017 Northrop Grumman. # All rights reserved. ###################################################################### # # Version 1 # Cristel Thomas # # library(FlowSOM) library(flowCore) checkFiles <- function(groups){ all_files <- unlist(groups) all_unique <- unique(all_files) if (length(all_unique) != length(all_files)) { quit(save = "no", status = 14, runLast = FALSE) } } compareLists <- function(m1, m2){ listCheck <- T if (is.na(all(m1==m2))) { mm1 <- is.na(m1) mm2 <- is.na(m2) if (all(mm1==mm2)){ if (!all(m1==m2, na.rm=TRUE)){ listCheck <- F } } else { listCheck <- F } } else if (!all(m1==m2)) { listCheck <- F } return(listCheck) } prettyMarkerNames <- function(flowFrame){ n <- flowFrame@parameters@data[, "name"] d <- flowFrame@parameters@data[, "desc"] d[is.na(d)] <- n[is.na(d)] prettyNames <- list() if(any(grepl("#",d))){ # Support for hashtag notation: # antibody#fluorochrome -> antibody (fluorochrome) prettyNames <- gsub("#(.*)$"," (\\1)",d) } else { prettyNames <- paste(d, " <", n, ">", sep="") } return(prettyNames) } compareToTree <- function(fst, wilc_thresh=0.05, output="", plot="", stats, comp_groups, filenames) { groupRes <- CountGroups(fst, groups=comp_groups, plot=FALSE) pdf(plot, useDingbats=FALSE, onefile=TRUE) tresh <- wilc_thresh pg <- PlotGroups(fst, groupRes, p_tresh=tresh) dev.off() nb_nodes <- length(pg[[1]]) nb_comp <- length(pg) m <- matrix(0, nrow=nb_nodes, ncol=nb_comp+1) s <- seq_len(nb_nodes) m[,1]<- as.character(s) for (i in 1:nb_comp){ m[s,i+1]<- as.character(pg[[i]]) } groupnames <- attr(comp_groups,"names") out_colnames <- paste(groupnames, collapse="-") colnames(m) <- c("Node",out_colnames) write.table(m, file=output, quote=F, row.names=F, col.names=T, sep='\t', append=F) ## get filenames filepaths <- unlist(comp_groups) fnames <- unlist(filenames) nb_files <- length(filepaths) comp_files <- list() for (i in 1:length(filepaths)){ comp_files[[filepaths[[i]]]] <- fnames[[i]] } group_list <- list() for (grp in attr(comp_groups, "names")) { for (f in comp_groups[[grp]]){ group_list[[f]] <- grp } } out_stats <- attr(stats, "names") if ("counts" %in% out_stats){ gp_counts <- as.matrix(groupRes$counts) tpc <- matrix("", nrow=nb_files, ncol=2) tpc[,1] <- as.character(lapply(rownames(gp_counts), function(x) comp_files[[x]])) tpc[,2] <- as.character(lapply(rownames(gp_counts), function(x) group_list[[x]])) gp_counts <- cbind(tpc, gp_counts) colnames(gp_counts)[[1]] <- "Filename" colnames(gp_counts)[[2]] <- "Group" t_gp_counts <- t(gp_counts) write.table(t_gp_counts, file=stats[["counts"]], quote=F, row.names=T, col.names=F, sep='\t', append=F) } if ("pctgs" %in% out_stats){ gp_prop <- as.matrix(groupRes$pctgs) tpp <- matrix("", nrow=nb_files, ncol=2) tpp[,1] <- as.character(lapply(rownames(gp_prop), function(x) comp_files[[x]])) tpp[,2] <- as.character(lapply(rownames(gp_prop), function(x) group_list[[x]])) gp_prop <- cbind(tpp, gp_prop) colnames(gp_prop)[[1]] <- "Filename" colnames(gp_prop)[[2]] <- "Group" t_gp_prop <- t(gp_prop) write.table(t_gp_prop, file=stats[["pctgs"]], quote=F, row.names=T, col.names=F, sep='\t', append=F) } if ("means" %in% out_stats){ gp_mean <- as.matrix(groupRes$means) t_gp_mean <- t(gp_mean) tpm <- matrix(0, nrow=nb_nodes, ncol=1) tpm[,1] <- seq_len(nb_nodes) t_gp_mean <- cbind(tpm, t_gp_mean) colnames(t_gp_mean)[[1]] <- "Nodes" write.table(t_gp_mean, file=stats[["means"]], quote=F, row.names=F, col.names=T, sep='\t', append=F) } if ("medians" %in% out_stats){ gp_med <- as.matrix(groupRes$medians) t_gp_med <- t(gp_med) tpd <- matrix(0, nrow=nb_nodes, ncol=1) tpd[,1] <- seq_len(nb_nodes) t_gp_med <- cbind(tpd, t_gp_med) colnames(t_gp_med)[[1]] <- "Nodes" write.table(t_gp_med, file=stats[["medians"]], quote=F, row.names=F, col.names=T, sep='\t', append=F) } } checkFCS <- function(tree, output="", plot="", thresh = 0.05, stats, groups, filenames) { fcsfiles <- unlist(groups) tree_valid <- F markerCheck <- T tryCatch({ fsomtree <- readRDS(tree) tree_valid <- T }, error = function(ex) { print(paste(ex)) }) fst <- if (length(fsomtree)==2) fsomtree[[1]] else fsomtree if (tree_valid){ tree_markers <- as.vector(fst$prettyColnames) tree_channels <- as.vector(colnames(fst$data)) if (length(tree_markers) < 1){ quit(save = "no", status = 11, runLast = FALSE) } } else { quit(save = "no", status = 11, runLast = FALSE) } for (i in 1:length(fcsfiles)){ is_file_valid <- F tryCatch({ fcs <- read.FCS(fcsfiles[i], transformation=FALSE) is_file_valid <- T }, error = function(ex) { print(paste(ex)) }) if (i == 1) { m1 <- as.vector(pData(parameters(fcs))$desc) c1 <- colnames(fcs) # compare to tree markers pm <- prettyMarkerNames(fcs) if (!all(tree_markers %in% pm)){ quit(save = "no", status = 13, runLast = FALSE) } } else { m2 <- as.vector(pData(parameters(fcs))$desc) c2 <- colnames(fcs) markerCheck <- compareLists(m1,m2) markerChannel <- compareLists(c1,c2) } } if (markerCheck && markerChannel) { compareToTree(fst, thresh, output, plot, stats, groups, filenames) } else { quit(save = "no", status = 12, runLast = FALSE) } } args <- commandArgs(trailingOnly = TRUE) first_g1 <- 5 tot_args <- length(args) g <- list() tmplist <- c("counts", "means", "medians", "pctgs") for (i in 5:13){ if (args[i] %in% tmplist){ first_g1 <- first_g1 + 2 g[[args[i]]] <- args[i+1] } } tmpargs <- paste(args[first_g1:tot_args], collapse="=%=") tmpgroups <- strsplit(tmpargs, "=%=DONE=%=") groups <- list() filenames <- list() for (gps in tmpgroups[[1]]) { tmpgroup <- strsplit(gps, "=%=") nb_files <- (length(tmpgroup[[1]]) - 1 ) /2 tmplist <- character(nb_files) tmpnames <- character(nb_files) j <- 1 for (i in 2:length(tmpgroup[[1]])){ if (!i%%2){ tmplist[[j]] <- tmpgroup[[1]][i] tmpnames[[j]]<- tmpgroup[[1]][i+1] j <- j + 1 } } groups[[tmpgroup[[1]][1]]] <- tmplist filenames[[tmpgroup[[1]][1]]] <- tmpnames } checkFiles(groups) checkFCS(args[1], args[2], args[3], args[4], g, groups, filenames)