Mercurial > repos > davidecangelosi > pipe_t

diff pipe-t.R @ 0:185ba61836ab draft
planemo upload for repository https://github.com/igg-molecular-biology-lab/pipe-t.git commit 04049039da97e1c9a8048e732afca48f2741cadf
author: davidecangelosi
date: Thu, 02 May 2019 04:49:04 -0400
children: 6cd22b1fbf6d
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pipe-t.R	Thu May 02 04:49:04 2019 -0400
@@ -0,0 +1,699 @@
+
+cat("\n Started! \n")
+#sessionInfo()
+
+
+# Send R errors to stderr
+options(show.error.messages = F, error = function(){cat(geterrmessage(), file = stderr()); q("no", 1, F)})
+
+# Avoid crashing Galaxy with an UTF8 error on German LC settings
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+suppressPackageStartupMessages({
+library(HTqPCR)
+library(base)
+library(Biobase)
+library(utils)
+library(stats)
+library(graphics)
+library(grDevices)
+library(RColorBrewer)
+library(limma)
+library(RankProd)
+library(methods)
+library(impute)
+library(BBmisc)
+library(affy)
+library(psych)
+#library(gmp)
+library(zoo)
+library(nondetects)
+library(Hmisc)
+#library(missForest)
+#library(mice)
+})
+
+cat("\n R libraries...loaded!\n")
+
+args = commandArgs(trailingOnly=TRUE)
+dpfiles<-basename(args[1])
+path000<-dirname(args[1])
+format<-args[2]
+nfeatures<-args[3]
+rawout<-args[4]
+path <- args[5]
+dcCtmin<-args[6]
+dcCtmax<-args[7]
+dcflag<-args[8]
+x<-args[9]
+normalizationMethod<-args[10]
+if (normalizationMethod=="deltaCt") {
+    normalizers<-args[11]
+    outputNorm<-args[12]
+    outputECDF<-args[13]
+    percentofnastoremove<-args[14]
+    outputRemaining<-args[15]
+    imputeMethod<-args[16]
+    if (imputeMethod=="knn") {
+      kappa<- args[17]
+      macsp<-args[18]
+      outputIMP<-args[19]
+
+      DEAMethod<-args[20]
+      if (DEAMethod=="ttest") {
+          alternative<- args[21]
+          paired<-args[22]
+          replicates<- args[23]
+          sort<-args[24]
+          stringent<- args[25]
+          padjust<-args[26]
+          outputDEA<-args[27]
+          filtnames<-args[28]
+        } else {
+          outputDEA<-args[21]
+          filtnames<-args[22]
+        }
+    } else {
+      #mean, median, nondetects, cubic 
+        outputIMP<-args[17]
+        DEAMethod<-args[18]
+        if (DEAMethod=="ttest") {
+            alternative<- args[19]
+            paired<-args[20]
+            replicates<- args[21]
+            sort<-args[22]
+            stringent<- args[23]
+            padjust<-args[24]
+            outputDEA<-args[25]
+            filtnames<-args[26]
+          } else {
+            outputDEA<-args[19]
+            filtnames<-args[20]
+          }
+    }
+  }else {
+    outputNorm<-args[11]
+    outputECDF<-args[12]
+    percentofnastoremove<-args[13]
+    outputRemaining<-args[14]
+    imputeMethod<-args[15]
+   
+    if (imputeMethod=="knn") {
+      kappa<- args[16]
+      macsp<-args[17]
+      outputIMP<-args[18]
+
+      DEAMethod<-args[19]
+     
+      if (DEAMethod=="ttest") {
+          alternative<- args[20]
+          paired<-args[21]
+          replicates<- args[22]
+          sort<-args[23]
+          stringent<- args[24]
+          padjust<-args[25]
+          outputDEA<-args[26]
+          filtnames<-args[27]
+        } else {
+          outputDEA<-args[20]
+          filtnames<-args[21]
+        }
+     } else {
+       #mean, median, nondetects, cubic 
+        outputIMP<-args[16]
+        DEAMethod<-args[17]
+          if (DEAMethod=="ttest") {
+            alternative<- args[18]
+            paired<-args[19]
+            replicates<- args[20]
+            sort<-args[21]
+            stringent<- args[22]
+            padjust<-args[23]
+            outputDEA<-args[24]
+            filtnames<-args[25]
+           
+          } else {
+            outputDEA<-args[18]
+            filtnames<-args[19]
+            
+          }
+      }
+  }
+cat("\n Initialization completed! \n")
+
+readCtDataDav<-
+function (files, path = NULL, n.features = 384, format = "plain",
+    column.info, flag, feature, type, position, Ct, header = FALSE,
+    SDS = FALSE, n.data = 1, samples, na.value = 40, sample.info,
+    ...)
+{
+    if (missing(files))
+        stop("No input files specified")
+    if (length(files) != length(n.data) & length(n.data) != 1)
+        stop("n.data must either be a single integer, or same length as number of files")
+    if (length(n.data) == 1)
+        n.data <- rep(n.data, length(files))
+    nsamples <- sum(n.data)
+    ncum <- cumsum(n.data)
+    s.names <- NULL
+    nspots <- n.features
+    if (SDS) {
+        warning("Please use format='SDS'. The SDS' parameter is retained for backward compatibility only.")
+        format <- "SDS"
+    }
+    if (!missing(flag) | !missing(feature) | !missing(type) |
+        !missing(position) | !missing(Ct)) {
+        warning("Please use 'column.info' for providing a list of column numbers containing particular information. The use of 'flag', 'feature', 'type', 'position' and 'Ct' is deprecated and will be removed in future versions.")
+    }
+    if (missing(column.info)) {
+        column.info <- switch(format, EDS = list(flag="EXPFAIL", feature="Target.Name",  position="Well.Position", Ct="CT"),
+          plain = list(flag = 4,
+            feature = 6, type = 7, position = 3, Ct = 8), SDS = list(flag = 4,
+            feature = 6, type = 7, position = 3, Ct = 8), LightCycler = list(feature = "Name",
+            position = "Pos", Ct = "Cp"), CFX = list(feature = "Content",
+            position = "Well", Ct = "Cq.Mean"), OpenArray = list(flag = "ThroughHole.Outlier",
+            feature = "Assay.Assay.ID", type = "Assay.Assay.Type",
+            position = "ThroughHole.Address", Ct = "ThroughHole.Ct"),
+            BioMark = list(flag = "Call", feature = "Name.1",
+                position = "ID", Ct = "Value"))
+    }
+    X <- matrix(0, nspots, nsamples)
+    X.flags <- as.data.frame(X)
+    X.cat <- data.frame(matrix("OK", ncol = nsamples, nrow = nspots),
+        stringsAsFactors = FALSE)
+    for (i in seq_along(files)) {
+        if (i == 1) {
+            cols <- 1:ncum[i]
+        }
+        else {
+            cols <- (ncum[i - 1] + 1):ncum[i]
+        }
+        readfile <- ifelse(is.null(path), files[i], file.path(path,
+            files[i]))
+        sample <- switch(format, EDS =.readCtEDS(readfile = readfile,
+        n.data = n.data, i = i, nspots = nspots, ...), plain = .readCtPlain(readfile = readfile,
+            header = header, n.features = n.features, n.data = n.data,
+            i = i, ...), SDS = .readCtSDS(readfile = readfile,
+            n.data = n.data, i = i, nspots = nspots, ...), LightCycler = .readCtLightCycler(readfile = readfile,
+            n.data = n.data, i = i, nspots = nspots, ...), CFX = .readCtCFX(readfile = readfile,
+            n.data = n.data, i = i, nspots = nspots, ...), OpenArray = .readCtOpenArray(readfile = readfile,
+            n.data = n.data, i = i, nspots = nspots, ...), BioMark = .readCtBioMark(readfile = readfile,
+            n.data = n.data, i = i, nspots = nspots, ...))
+
+        data <- matrix(sample[, column.info[["Ct"]]], ncol = n.data[i])
+        undeter <- apply(data, 2, function(x) x %in% c("Undetermined",
+            "No Ct"))
+        X.cat[, cols][undeter] <- "Undetermined"
+        nas <- c("Undetermined", "No Ct", "999", "N/A")
+        if (is.null(na.value)) {
+            data[data %in% nas | data == 0] <- NA
+        }
+        else {
+            data[data %in% nas | is.na(data) | data == 0] <- na.value
+        }
+        X[, cols] <- apply(data, 2, function(x) as.numeric(as.character(x)))
+        if ("flag" %in% names(column.info)) {
+            flags <- matrix(sample[, column.info[["flag"]]],
+                ncol = n.data[i])
+            flags[flags == "-"] <- "Failed"
+            flags[flags == "+"] <- "Passed"
+            X.flags[, cols] <- flags
+        }
+        else {
+            X.flags[, cols] <- "Passed"
+        }
+        if (format == "OpenArray") {
+            s.names <- c(s.names, unique(sample$SampleInfo.SampleID))
+        }
+        else if (format %in% c("EDS","plain", "SDS")) {
+            s.names <- c(s.names, unique(sample[, 2]))
+        }
+        else {
+            s.names <- s.names
+        }
+        if (i == 1) {
+            featPos <- paste("feature", 1:nspots, sep = "")
+            if ("position" %in% names(column.info))
+                featPos <- as.character(sample[1:nspots, column.info[["position"]]])
+            featType <- factor(rep("Target", nspots))
+            if ("type" %in% names(column.info))
+                featType <- sample[1:nspots, column.info[["type"]]]
+            featName <- paste("feature", 1:nspots, sep = "")
+            if ("feature" %in% names(column.info))
+                featName <- as.character(sample[1:nspots, column.info[["feature"]]])
+            df <- data.frame(featureNames = featName, featureType = as.factor(featType),
+                featurePos = featPos)
+            metaData <- data.frame(labelDescription = c("Name of the qPCR feature (gene)",
+                "Type pf feature", "Position on assay"))
+            featData <- AnnotatedDataFrame(data = df, varMetadata = metaData)
+        }
+    }
+    if (!missing(samples)) {
+        if (length(samples) < nsamples) {
+            warning("Not enough sample names provided; using Sample1, Sample2, ... instead\n")
+            samples <- paste("Sample", 1:nsamples, sep = "")
+        }
+        else if (length(samples) == nsamples) {
+            samples <- samples
+        }
+    }
+    else if (missing(samples)) {
+        if (length(files) == nsamples) {
+            samples <- gsub("(.+)\\..+", "\\1", files)
+        }
+        else if (length(s.names) == nsamples) {
+            samples <- s.names
+        }
+        else {
+            samples <- paste("Sample", 1:nsamples, sep = "")
+        }
+    }
+    samples <- make.unique(samples)
+    if (any(is.na(X)))
+        warning("One or more samples contain NAs. Consider replacing these with e.g. Ct=40 now.")
+    if (missing(sample.info)) {
+        pdata <- data.frame(sample = 1:length(samples), row.names = samples)
+        sample.info <- new("AnnotatedDataFrame", data = pdata,
+            varMetadata = data.frame(labelDescription = "Sample numbering",
+                row.names = "Sample names"))
+    }
+    X.hist <- data.frame(history = capture.output(match.call(readCtData)),
+        stringsAsFactors = FALSE)
+    out <- new("qPCRset", exprs = X, phenoData = sample.info,
+        featureData = featData, featureCategory = X.cat, flag = X.flags,
+        CtHistory = X.hist)
+    out
+}
+.readCtEDS	<-
+function(readfile=readfile, n.data=n.data, i=i, nspots=nspots, ...)
+{
+	# Scan through beginning of file, max 100 lines
+	file.header <- readLines(con=readfile, n=100)
+	n.header	<- grep("^Well", file.header)
+	if (length(n.header)==0)
+		n.header	<- 0
+	# Read data, skip the required lines
+	out	<- read.delim(file=readfile, header=TRUE, colClasses="character", nrows=nspots*n.data[i], skip=n.header-1, strip.white=TRUE, ...)
+	out
+} # .readCtEDS
+
+head(read.delim(file.path(path000, dpfiles), sep="\t"))
+files <- read.delim(file.path(path000, dpfiles), sep="\t")
+switch(format,
+  "EDS"={
+      columns<- list(flag="EXPFAIL", feature="Target.Name",  position="Well.Position", Ct="CT")
+      metadata <- data.frame(labelDescription = c("sampleName", "Treatment"),  row.names = c("sampleName", "Treatment"))
+      phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata)
+      rownames(phenoData)=as.vector(files$sampleName)
+      raw<- readCtDataDav(files = as.vector(files$sampleName), header=TRUE,  format="EDS", column.info=columns, path = path,sample.info=phenoData,n.features = as.numeric(nfeatures))
+    },
+    "plain"={
+      metadata <- data.frame(labelDescription = c("sampleName", "Treatment"),  row.names = c("sampleName", "Treatment"))
+      phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata)
+      rownames(phenoData)=as.vector(files$sampleName)
+      raw<- readCtDataDav(files = as.vector(files$sampleName), header=FALSE,  format="plain", path = path, sample.info=phenoData,n.features = as.numeric(nfeatures))
+    },
+    "SDS"={
+      columns<- list(feature=3, Ct=6, flag=11)
+      metadata <- data.frame(labelDescription = c("sampleName", "Treatment"),  row.names = c("sampleName", "Treatment"))
+      phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata)
+      rownames(phenoData)=as.vector(files$sampleName)
+      raw<- readCtDataDav(files = files$sampleName, format="SDS",column.info=columns, path = path, sample.info=phenoData, n.features=as.numeric(nfeatures))
+    },
+    "LightCycler"={
+      metadata <- data.frame(labelDescription = c("sampleName", "Treatment"),  row.names = c("sampleName", "Treatment"))
+      phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata)
+      rownames(phenoData)=as.vector(files$sampleName)
+      raw <- readCtDataDav(files = files$sampleName, path = path, format = "LightCycler", sample.info=phenoData,n.features = as.numeric(nfeatures))
+    },
+    "CFX"={
+      metadata <- data.frame(labelDescription = c("sampleName", "Treatment"),  row.names = c("sampleName", "Treatment"))
+      phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata)
+      rownames(phenoData)=as.vector(files$sampleName)
+      raw <- readCtDataDav(files = files$sampleName, path = path, format = "CFX", sample.info=phenoData,n.features = as.numeric(nfeatures))
+    },
+    "OpenArray"={
+      metadata <- data.frame(labelDescription = c("sampleName", "Treatment"),  row.names = c("sampleName", "Treatment"))
+      phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata)
+      rownames(phenoData)=as.vector(files$sampleName)
+      raw <- readCtDataDav(files = files$sampleName, path = path, format = "OpenArray", sample.info=phenoData,n.features = as.numeric(nfeatures))
+    },
+    "BioMark"={
+      metadata <- data.frame(labelDescription = c("sampleName", "Treatment"),  row.names = c("sampleName", "Treatment"))
+      phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata)
+      rownames(phenoData)=as.vector(files$sampleName)
+      raw <- readCtDataDav(files = files$sampleName, path = path, format = "BioMark", sample.info=phenoData, n.features = as.numeric(nfeatures))
+    },
+    stop("Enter something that switches me!")
+)
+cat("\n Files read correctly! ")
+
+write.table(exprs(raw), file=rawout, quote=FALSE,  row.names=TRUE, col.names=TRUE,sep = "\t")
+####################################################################################################################
+#Set a new categories for the values meeting two criterions
+switch(format,
+    "EDS"={
+      unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE,  flag=dcflag, flag.out="Y", quantile=NULL)
+    },
+    "plain"={
+      unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE,  flag=dcflag, flag.out="Flagged", quantile=NULL)
+    },
+    "SDS"={
+      unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE,  flag=dcflag, flag.out="TRUE", quantile=NULL)
+    },
+    "LightCycler"={
+      unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE,  flag=dcflag, flag.out="Y", quantile=NULL)
+    },
+    "CFX"={
+      unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE,  flag=dcflag, flag.out="Y", quantile=NULL)
+    },
+    "OpenArray"={
+      unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE,  flag=dcflag, flag.out="TRUE ", quantile=NULL)
+    },
+    "BioMark"={
+      unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE,  flag=dcflag, flag.out="Fail", quantile=NULL)
+    },
+    stop("Enter something that switches me!")
+)
+#unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE,  flag=dcflag, flag.out="Y", quantile=NULL)
+
+####################################################################################################################
+#Filter out the values of the new category
+xFilter <- filterCategory(unreliable)
+
+cat("\n Categorization completed! ")
+
+png(x,    # create PNG for the heat map
+  width = 10*300,        # 5 x 300 pixels
+  height = 10*300,
+  res = 300,            # 300 pixels per inch
+  pointsize = 8)
+  plotCtBoxes(xFilter, stratify=NULL, xlab = "Samples", ylab="Ct", names=as.character(seq(1, ncol(xFilter), 1)))       # smaller font size
+dev.off()
+
+#write.table(exprs(xFilter), file=x, quote=FALSE,  row.names=TRUE, col.names=TRUE,sep = "\t")
+
+####################################################################################################################
+#NORMALIZATION
+#method version 3.5.1 + Global mean
+normalizeCtDataDav <-
+function(q,
+	norm	= "deltaCt",
+	deltaCt.genes	= NULL,
+	scale.rank.samples,
+	rank.type	= "pseudo.median",
+	Ct.max	= dcCtmax,
+	geo.mean.ref,
+	verbose	= TRUE)
+{
+	# Extract the data
+	data	<- exprs(q)
+	data.norm	<- data
+	# Get the normalisation method
+	method	<- match.arg(norm, c("quantile", "scale.rankinvariant", "norm.rankinvariant", "deltaCt", "geometric.mean", "globalmean"))
+	# Some general stuff that will be used by both rank.invariant methods
+	if (method %in% c("scale.rankinvariant", "norm.rankinvariant")) {
+		# Index to use for too high Ct values
+		#Ct.index	<- data>Ct.max
+		data.Ctmax	<- data
+    Ct.index	<- is.na(data)
+		#data.Ctmax[Ct.index]	<- NA
+    data<-na.spline(data)
+		# Define what to rank against
+		if (rank.type=="pseudo.median") {
+			ref.data	<- apply(data.Ctmax, 1, median, na.rm=TRUE)
+		} else if (rank.type=="pseudo.mean") {
+			ref.data	<- apply(data.Ctmax, 1, mean, na.rm=TRUE)
+		}
+		# Mark + replace NA values with something temporary
+		na.index	<- is.na(ref.data)
+		ref.data[na.index] <- 30
+		# Run the rank.invariant function
+		data.rankinvar	<- apply(data, 2, normalize.invariantset, ref=ref.data)
+	}
+	# The actual normalisation
+	switch(method,
+		quantile = {
+			# Use an internal limma function
+			data.norm	<- normalizeQuantiles(data)
+		},
+		scale.rankinvariant = {
+			# Get all the rank invariant genes
+			ri.genes	<- sapply(data.rankinvar, "[[", "i.set")
+			# Remove those with too high Ct values
+			ri.genes[Ct.index]	<- FALSE
+			# Remove those that were all NA for potentially other reasons
+			ri.genes[na.index,]	<- FALSE
+			# Select those to use here
+			ri.count	<- rowSums(ri.genes)
+			if (missing(scale.rank.samples))
+				scale.rank.samples	<- ncol(data)-1
+			ri.index	<- ri.count >= scale.rank.samples
+			if (sum(ri.index)==0)
+				stop(paste("No rank invariant genes were found across", scale.rank.samples, "samples"))
+			# Extract the corresponding Ct values; average
+			ri.mean	<- colMeans(data[ri.index,,drop=FALSE])
+			ri.scale	<- ri.mean/ri.mean[1]
+			# Correct the data
+			data.norm	<- t(t(data)*ri.scale)
+			# Print info
+			if (verbose) {
+				cat(c("Scaling Ct values\n\tUsing rank invariant genes:", paste(featureNames(q)[ri.index], collapse=" "), "\n"))
+				cat(c("\tScaling factors:", format(ri.scale, digits=3), "\n"))
+			}
+		},
+		norm.rankinvariant = {
+			# Print info
+			if (verbose)
+				cat("Normalizing Ct values\n\tUsing rank invariant genes:\n")
+			# Correct the data based on the calculations above
+			for (i in 1:ncol(data)) {
+				# Check if there are any rank invariant genes
+				ri.sub	<- data.rankinvar[[i]]
+				ri.genes	<- ri.sub[["i.set"]]
+				# Remove those that don't pass the Ct.max criteria
+				ri.genes[Ct.index[,i]]	<- FALSE
+				# Remove those that are NA for other reasons
+				ri.genes[na.index]	<- FALSE
+				if (sum(ri.genes)==0) {
+	           		warning(paste("\tNo rank invariant genes were found for sample ", sampleNames(q)[i], "; sample not normalized\n", sep=""))
+	           		next
+	           	}
+	           	# If verbose, print some info
+	           	if (verbose)
+	           		cat(paste("\t", sampleNames(q)[i], ": ", sum(ri.genes), " rank invariant genes\n", sep=""))
+	           # The actual correction
+	           data.norm[,i]	<- as.numeric(approx(ri.sub$n.curve$y, ri.sub$n.curve$x, xout=data[,i], rule=2)$y)
+	       }
+ 		},
+		deltaCt	= {
+			# Which are the reference genes (endogenous controls)
+			if (is.null(deltaCt.genes))
+				deltaCt.genes	<- unique(featureNames(q)[featureType(q)=="Endogenous Control"])
+			c.index	<- featureNames(q) %in% deltaCt.genes
+			if (verbose) {
+				cat(c("Calculating deltaCt values\n\tUsing control gene(s):", paste(deltaCt.genes, collapse=" "), "\n"))
+			}
+			# Run though all cards; perform internal normalisation
+			for (c in 1:ncol(data)) {
+				# Calculate the control genes
+				refCt	<- mean(data[c.index,c], na.rm=TRUE)
+				refsd	<- sd(data[c.index,c], na.rm=TRUE)
+				# Difference for target and controls
+				data.norm[,c] <- data[,c]-refCt
+				# Print results
+				if (verbose)
+					cat(paste("\tCard ", c, ":\tMean=", format(refCt, dig=4), "\tStdev=", format(refsd, dig=3), "\n", sep=""))
+			}
+		},
+		geometric.mean = {
+			# For each column, calculate the geometric mean of Ct values<Ct.max
+			#geo.mean	<- apply(data, 2, function(x) {
+			#						xx <- log2(subset(x, x<Ct.max))
+			#						2^mean(xx)})
+      geo.mean	<- apply(data, 2, function(x) {
+									xx <- subset(x, x<=Ct.max)
+									geometric.mean(xx)})
+			# Which sample to scale to
+			#if (missing(geo.mean.ref))
+			#	geo.mean.ref <- 1
+			# Calculate the scaling factor
+			#geo.scale	<- geo.mean/geo.mean[geo.mean.ref]
+			# Adjust the data accordingly
+			data.norm <- t(t(data) - geo.mean)
+			if (verbose) {
+				cat(c("Scaling Ct values\n\tUsing geometric mean within each sample\n"))
+				#cat(c("\tScaling factors:", format(geo.scale, digits=3), "\n"))
+			}
+		} # switch
+    ,globalmean = {
+        glo <- apply(data, 2, function(x) {
+            xx <- subset(x, x <= Ct.max)
+            mean(xx)
+        })
+        data.norm <- t(t(data) - glo)
+    }
+	)
+	# Replace with the normalised Ct exprs
+	exprs(q)	<- data.norm
+	# Add to the history of the object
+	if (nrow(getCtHistory(q))==0)
+		setCtHistory(q)	<- data.frame(history="Manually created qPCRset object.", stringsAsFactors=FALSE)
+	setCtHistory(q)	<- rbind(getCtHistory(q), capture.output(match.call(normalizeCtData)))
+	# Return the normalised object
+	q
+}
+
+
+switch(normalizationMethod,
+    "deltaCt"={
+      normalizedDataset <- normalizeCtDataDav(xFilter, norm="deltaCt",  deltaCt.genes =explode(normalizers, sep = ","))
+    },
+    "quantile"={
+      normalizedDataset <- normalizeCtDataDav(xFilter, norm=normalizationMethod)
+    },
+    "scale.rankinvariant"={
+      normalizedDataset <- normalizeCtDataDav(xFilter, norm=normalizationMethod)
+    },
+    "norm.rankinvariant"={
+       normalizedDataset <- normalizeCtDataDav(xFilter, norm=normalizationMethod)
+    },
+    "geometric.mean"={
+      normalizedDataset <- normalizeCtDataDav(xFilter, norm=normalizationMethod)
+    },
+    "globalmean"={
+      normalizedDataset <- normalizeCtDataDav(xFilter, norm=normalizationMethod)
+    },
+    stop("Enter something that switches me!")
+)
+
+  #if (normalizationMethod=="deltaCt") {
+#normalize CT data
+
+#normalizedDataset <- normalizeCtDataDav(xFilter, norm="deltaCt",  deltaCt.genes =explode(normalizers, sep = ","))
+#} else {
+#normalizedDataset <- normalizeCtDataDav(xFilter, norm=normalizationMethod)
+
+#}
+cat("\n Data normalized correctly! \n")
+write.table(exprs(normalizedDataset), file=outputNorm, quote=FALSE,  row.names=TRUE, col.names=TRUE,sep = "\t")
+
+
+#normalizedDataset
+####################################################################################################################
+#Check noise reduction by empirical cumulative distribution
+
+#X = rnorm(100) # X is a sample of 100 normally distributed random variables
+# P = ecdf(X)    # P is a function giving the empirical CDF of X
+#Y = rnorm(1000) # X is a sample of 100 normally distributed random variables
+# PY = ecdf(Y)
+#plot℗
+
+#lines(PY)
+png(outputECDF,    # create PNG for the heat map
+  width = 10*300,        # 5 x 300 pixels
+  height = 10*300,
+  res = 300,            # 300 pixels per inch
+  pointsize = 8)        # smaller font size
+vec=c()
+for (i in 1:nrow(exprs(xFilter))){
+   CVX<-(sd(2^-exprs(xFilter)[i,], na.rm = TRUE)/mean(2^-exprs(xFilter)[i,], na.rm = TRUE))*100
+vec=c(vec, c(CVX))
+}
+vec<-na.omit(vec)
+P = ecdf(vec)
+gm=c()
+for (i in 1:nrow(exprs(normalizedDataset))){
+   CVGM<-(sd(2^-exprs(normalizedDataset)[i,], na.rm = TRUE)/mean(2^-exprs(normalizedDataset)[i,], na.rm = TRUE))*100
+gm=c(gm, c(CVGM))
+}
+gm<-na.omit(gm)
+
+PY = ecdf(gm)
+
+plot_colors <- c(rgb(r=0.0,g=0.0,b=0.9), "red", "forestgreen",rgb(r=0.0,g=0.0,b=0.0),rgb(r=0.5,g=0.0,b=0.3),rgb(r=0.0,g=0.4,b=0.4))
+
+plot(P,col=plot_colors[1],xlim=c(0.0,600),  ylim=c(0.0,1),xaxp = c(0.0, 600, 6),yaxp = c(0.0, 1, 10), cex=1.3, lwd=5, main=NULL,xlab="CV(%)",ylab="Empirical Cumulative Distribution")
+lines(PY, lwd=5, col=plot_colors[6],cex=1.3)
+legend("bottomright", c("not normalized", "normalized"), cex=1.3, col=c(plot_colors[1],plot_colors[6]), lwd=c(5,5));
+dev.off()
+
+#Two-sample Kolmogorov-Smirnov
+ks.test(vec,gm)
+
+
+#write.table(exprs(qFiltNAs), file=outputIMP, quote=FALSE,  row.names=TRUE, col.names=TRUE,sep = "\t")
+png(outputIMP,    # create PNG for the heat map
+  width = 10*300,        # 5 x 300 pixels
+  height = 10*300,
+  res = 300,            # 300 pixels per inch
+  pointsize = 8)
+  plotCtBoxes(normalizedDataset, stratify=NULL, xlab = "Samples", ylab="DeltaCt", names=as.character(seq(1, ncol(normalizedDataset), 1)))       # smaller font size
+dev.off()
+
+################################################## Filtering based on number of NAs##################################################
+
+#FILTERING on the basis of NAs
+#qPCRset.R
+setMethod("exprs", signature(object="qPCRset"), definition =
+            function (object) {x <- assayDataElement(object, "exprs"); rownames(x) <- featureNames(object); colnames(x) <- sampleNames(phenoData(object));x}
+)
+ncatn <- as.integer(n.samples(normalizedDataset))*as.integer(percentofnastoremove)/100
+
+qFiltNAs <- filterCtData(normalizedDataset, remove.category=c("Undetermined","Unreliable"), n.category=as.integer(ncatn),remove.name=explode(filtnames, sep = ","))
+
+cat("\n Data filtered correctly! \n")
+
+if (is.na(exprs(qFiltNAs))){
+switch(imputeMethod,
+    "knn"={
+     imp<-impute.knn(exprs(qFiltNAs) ,k = as.integer(kappa), maxp = as.integer(macsp), rng.seed=362436069)
+      exprs(qFiltNAs)=imp$data
+    },
+    "mestdagh"={
+     #Mesdagh
+      #sostituisce a NA -1000
+      for (i in 1:nrow(exprs(qFiltNAs))){
+        for(j in 1:ncol(exprs(qFiltNAs))){
+          if(is.na(exprs(qFiltNAs)[i,j])>0)exprs(qFiltNAs)[i,j]<- -1000
+        }
+      }
+      temp<-exprs(qFiltNAs)
+      for (i in 1:nrow(exprs(qFiltNAs))){
+        for(j in 1:ncol(exprs(qFiltNAs))){
+          if(exprs(qFiltNAs)[i,j]<(-100))exprs(qFiltNAs)[i,j]<- max(temp[i,])+1
+        }
+      }
+    },
+    "cubic"={
+     exprs(qFiltNAs) <- na.spline(exprs(qFiltNAs))
+    },
+    "mean"={
+      exprs(qFiltNAs)<-impute(exprs(qFiltNAs),mean)
+    },
+    "median"={
+      exprs(qFiltNAs)<-impute(exprs(qFiltNAs),median)
+    },
+    "nondetects"={
+      qFiltNAs <- qpcrImpute(qFiltNAs, outform=c("Single"),linkglm = c("logit"))
+    },
+    stop("Enter something that switches me!")
+)
+
+cat("\n Imputation completed! \n")
+}else{
+  cat("\n Nothing to impute! \n")
+}
+
+write.table(exprs(qFiltNAs), file=outputRemaining, quote=FALSE,  row.names=TRUE, col.names=TRUE, sep = "\t")
+
+if (DEAMethod=="ttest") {
+ #Differential expression analysis (paired t test+BH). Returns Fold change in linear scale.
+ DEG<-ttestCtData(qFiltNAs, groups = files$Treatment, alternative = alternative, paired = ifelse(paired=="TRUE", TRUE, FALSE), replicates =ifelse(replicates=="TRUE", TRUE, FALSE), sort=ifelse(sort=="TRUE", TRUE, FALSE), stringent=ifelse(stringent=="TRUE", TRUE, FALSE), p.adjust=padjust)
+ write.table(DEG, file=outputDEA, quote=FALSE,  row.names=TRUE, col.names=TRUE,sep = "\t")
+} 
+if (DEAMethod=="rp") {
+ DEG<-RP(exprs(qFiltNAs), as.numeric(pData(qFiltNAs)$Treatment)-1, num.perm = 1000,logged = TRUE, gene.names = featureNames(qFiltNAs), huge=TRUE, plot = FALSE, rand = 123)
+ write.table(DEG[1:5], file=outputDEA, quote=FALSE,  row.names=TRUE, col.names=TRUE,sep = "\t")
+}
+cat("\n Differential expression analysis completed correctly! \n")
+cat("\n Workflow ended correctly! \n")
author	davidecangelosi
date	Thu, 02 May 2019 04:49:04 -0400
parents
children	6cd22b1fbf6d