diff FlowSOMMApIndividualFCS.R @ 0:e796ed5dfd02 draft

"planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/flowsom_cross_comp commit 2f04d23cb47d9b233d159c54ff406e56c87746e0"
author azomics
date Tue, 23 Jun 2020 19:49:33 -0400
parents
children a1054bd1060a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/FlowSOMMApIndividualFCS.R	Tue Jun 23 19:49:33 2020 -0400
@@ -0,0 +1,277 @@
+#!/usr/bin/Rscript
+# Module for Galaxy
+# Generates FlowSOM reference tree
+# with FlowSOM AggregateFlowFrames
+######################################################################
+#                  Copyright (c) 2017 Northrop Grumman.
+#                          All rights reserved.
+######################################################################
+#
+# Version 1
+# Cristel Thomas
+#
+#
+library(FlowSOM)
+library(flowCore)
+
+## geometric mean from
+# https://stackoverflow.com/questions/2602583/geometric-mean-is-there-a-built-in
+gm_mean = function(x, na.rm=TRUE){
+  exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))
+}
+
+prettyMarkerNames <- function(flowFrame){
+  n <- flowFrame@parameters@data[, "name"]
+  d <- flowFrame@parameters@data[, "desc"]
+  d[is.na(d)] <- n[is.na(d)]
+  prettyNames <-list()
+  if(any(grepl("#",d))){
+      # Support for hashtag notation:
+      # antibody#fluorochrome -> antibody (fluorochrome)
+      prettyNames <- gsub("#(.*)$"," (\\1)",d)
+  } else {
+      prettyNames <- paste(d, " <", n, ">", sep="")
+  }
+  return(prettyNames)
+}
+
+checkMarkers <- function(fcsfiles, flag_ff=FALSE){
+  markerCheck <- T
+  for (i in 1:length(fcsfiles)){
+    if (flag_ff){
+      fcs <- readRDS(fcsfiles[i])
+    } else {
+      fcs <- read.FCS(fcsfiles[i], transformation=FALSE)
+    }
+    if (i == 1) {
+      m1 <- as.vector(pData(parameters(fcs))$desc)
+    } else {
+      m2 <- as.vector(pData(parameters(fcs))$desc)
+      if (is.na(all(m1==m2))) {
+        mm1 <- is.na(m1)
+        mm2 <- is.na(m2)
+        if (all(mm1==mm2)){
+          if (!all(m1==m2, na.rm=TRUE)){
+            markerCheck <- F
+          }
+        } else {
+          markerCheck <- F
+        }
+      } else if (!all(m1==m2)) {
+        markerCheck <- F
+      }
+    }
+  }
+  if (!markerCheck) {
+    quit(save = "no", status = 13, runLast = FALSE)
+  }
+}
+
+mapToTree <- function(filenames, filepaths, flag_ff=FALSE, reftree, cluster=10,
+                      outdir="",flag_meta=FALSE, mfi='mfi', stat1="", stat2="",
+                      stat3="", plot="", mplot="") {
+
+  checkMarkers(filepaths, flag_ff)
+
+  # get tree
+  fst <- readRDS(reftree)
+  plots <- FALSE
+  mplots <- FALSE
+  dir.create(outdir)
+  if (!plot==""){
+    dir.create(plot)
+    plots <- TRUE
+  }
+  if (!mplot==""){
+    dir.create(mplot)
+    mplots <- TRUE
+  }
+  metaC <- metaClustering_consensus(fst$map$codes, k=cluster, seed=33)
+  nb_pop <- if (flag_meta) cluster else max(fst$map$mapping[,1])
+  nb_samples <- length(filepaths)
+  nb_marker <- length(fst$prettyColnames)
+  print_markers <- gsub(' <.*>','',fst$prettyColnames)
+  print_markers_ff <- append(print_markers, "Population")
+
+  m_stat1 <- matrix(0, nrow=nb_samples, ncol=(nb_pop+2))
+  colnames(m_stat1) <- c("FileID", "SampleName", seq_len(nb_pop))
+
+  sink(stat2)
+  cat(print_markers, sep="\t")
+  cat("\tPercentage\tPopulation\tSampleName\n")
+  sink()
+
+  col_m3 <- c("Population")
+  for (m in print_markers){
+    m1 <- paste(m, "mean", sep="_")
+    m2 <- paste(m, "median", sep="_")
+    m3 <- paste(m, "stdev", sep="_")
+    col_m3 <- append(col_m3, c(m1,m2,m3))
+  }
+  col_stat3 <- c(col_m3, "Percentage_mean","Percentage_median","Percentage_stdev")
+
+  for (i in 1:length(filepaths)){
+    if (flag_ff){
+      ff <- readRDS(filepaths[i])
+    } else {
+      ff <- read.FCS(filepaths[i], transformation=FALSE)
+    }
+    if (i==1) {
+      # compare to tree markers
+      pm <- prettyMarkerNames(ff)
+      if (!all(fst$prettyColnames %in% pm)){
+        quit(save = "no", status = 14, runLast = FALSE)
+      }
+    }
+
+    fsom <- NewData(fst, ff)
+
+    if (mplots){
+      markers <- colnames(ff)
+      tmpmplot <- paste(filenames[i], "marker_plots.pdf", sep="_")
+      pdf(file.path(mplot,tmpmplot), useDingbats=FALSE, onefile=TRUE)
+      for (marker in markers){
+        PlotMarker(fsom, marker)
+      }
+      dev.off()
+    }
+
+    if (!plot==""){
+      plotpath <- paste(filenames[i], "tree.png", sep="_")
+      png(file.path(plot, plotpath), type="cairo", height=800, width=800)
+      PlotStars(fsom, backgroundValues = as.factor(metaC))
+      dev.off()
+    }
+
+    m <- matrix(0,nrow=nrow(ff),ncol=1)
+    s <- seq_len(nrow(ff))
+    if (flag_meta){
+      m[s,] <- metaC[fsom$map$mapping[,1]]
+    } else {
+      m[s,] <- fsom$map$mapping[,1]
+    }
+    colnames(m) <- "FlowSOM"
+    ff <- cbind2(ff,m)
+    out <- exprs(ff)
+    colnames(out) <- print_markers_ff
+
+    clr_table <- paste(filenames[i], "clustered.flowclr", sep="_")
+    write.table(out, file=file.path(outdir, clr_table), quote=F, row.names=F, col.names=T, sep='\t',
+                append=F)
+
+    cluster_count <- table(out[,"Population"])
+    cluster_prop <- prop.table(cluster_count)*100
+    m1_tmp <- numeric(nb_pop)
+    for (j in 1:nb_pop){
+      if (as.character(j) %in% names(cluster_count)){
+        m1_tmp[[j]] <- format(round(cluster_prop[[as.character(j)]], 2), nsmall=2)
+      }
+    }
+    samplename <- paste("Sample", i, sep="")
+    m_stat1[i,] <- c(filenames[i], samplename, m1_tmp)
+    # flowstat2
+    # Marker1 Marker2 Marker3 ... Population Percentage SampleName
+    # MFIs for each marker
+    # dimension ==> col = nb of markers + 3; row = nb of files * nb of clusters
+    if (mfi=="mfi"){
+      m2 <- aggregate(out[,1:nb_marker], list(out[,nb_marker+1]), mean)
+    } else if (mfi=="mdfi") {
+      m2 <- aggregate(out[,1:nb_marker], list(out[,nb_marker+1]), median)
+    } else {
+      m2 <- aggregate(out[,1:nb_marker], list(out[,nb_marker+1]), gm_mean)
+    }
+
+    m2["Percentage"] <- as.character(cluster_prop)
+    m2["Population"] <- as.character(m2$Group.1)
+    m2["SampleName"] <- samplename
+    m2t <- as.matrix(m2[2:length(m2)])
+    write.table(m2t, file=stat2, quote=F, row.names=F, col.names=F, sep='\t',
+                append=T)
+
+  }
+  write.table(m_stat1, file=stat1, quote=F, row.names=F, col.names=T, sep='\t',
+              append=F)
+
+  m2df <- read.table(stat2, sep="\t", header=T)
+  ag <- aggregate(m2df[,0:nb_marker+1], list(m2df[,nb_marker+2]), function(x) c(mn=mean(x), med=median(x), stdv=sd(x)))
+  m3t <- as.matrix(ag)
+  colnames(m3t) <- col_stat3
+  write.table(m3t, file=stat3, quote=F, row.names=F, col.names=T, sep='\t',
+              append=F)
+}
+
+flowFrameOrFCS <- function(filenames, filepaths, reftree, cluster=10, outdir="",
+                           flag_meta=FALSE, mfi='mfi', stat1="", stat2="",
+                           stat3="", plot="", mplot=""){
+
+  isValid <- F
+  is_fcs <- F
+  is_ff <- F
+  flag_ff <- FALSE
+  i <- 0
+  for (f in filepaths){
+    tryCatch({
+      is_fcs <- isFCSfile(f)
+    }, error = function(ex) {
+      print(paste(ex))
+    })
+    if (!is_fcs){
+      tryCatch({
+        ff <- readRDS(f)
+        is_ff <- T
+      }, error = function(ex) {
+        print(paste(ex))
+      })
+    } else {
+      i <- i + 1
+    }
+    if (!is_ff && !is_fcs) {
+      quit(save = "no", status = 10, runLast = FALSE)
+    }
+  }
+  if (i==0) {
+    flag_ff <- TRUE
+  } else if (!i==length(filenames)){
+    quit(save = "no", status = 12, runLast = FALSE)
+  }
+  mapToTree(filenames, filepaths, flag_ff, reftree, cluster, outdir, flag_meta,
+            mfi, stat1, stat2, stat3, plot, mplot)
+}
+
+args <- commandArgs(trailingOnly = TRUE)
+plot <- ""
+mplot <- ""
+m <- 8
+flag_meta <- FALSE
+if (args[4]=='meta') {
+  flag_meta <- TRUE
+}
+if (args[9] == 'newDataTrees') {
+  plot <- 'newDataTrees'
+  m <- m + 1
+  if (args[10] == 'newDataMarkers') {
+    mplot <- "newDataMarkers"
+    m <- m + 1
+  }
+} else if (args[9] == 'newDataMarkers') {
+  mplot <- "newDataMarkers"
+  m <- m + 1
+}
+
+n <- m+1
+# files + filenames
+nb_files <- (length(args) - m) / 2
+files1 <- character(nb_files)
+files2 <- character(nb_files)
+j <- 1
+file_list <- args[n:length(args)]
+for (i in 1:length(file_list)) {
+  if (i%%2){
+    files1[[j]] <- file_list[i]
+    files2[[j]] <- file_list[i+1]
+    j <- j + 1
+  }
+}
+
+flowFrameOrFCS(files2, files1, args[1], as.numeric(args[3]), args[2], flag_meta,
+               args[5], args[6], args[7], args[8], plot, mplot)