comparison pipe-t.R @ 0:185ba61836ab draft

planemo upload for repository https://github.com/igg-molecular-biology-lab/pipe-t.git commit 04049039da97e1c9a8048e732afca48f2741cadf
author davidecangelosi
date Thu, 02 May 2019 04:49:04 -0400
parents
children 6cd22b1fbf6d
comparison
equal deleted inserted replaced
-1:000000000000 0:185ba61836ab
1
2 cat("\n Started! \n")
3 #sessionInfo()
4
5
6 # Send R errors to stderr
7 options(show.error.messages = F, error = function(){cat(geterrmessage(), file = stderr()); q("no", 1, F)})
8
9 # Avoid crashing Galaxy with an UTF8 error on German LC settings
10 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
11 suppressPackageStartupMessages({
12 library(HTqPCR)
13 library(base)
14 library(Biobase)
15 library(utils)
16 library(stats)
17 library(graphics)
18 library(grDevices)
19 library(RColorBrewer)
20 library(limma)
21 library(RankProd)
22 library(methods)
23 library(impute)
24 library(BBmisc)
25 library(affy)
26 library(psych)
27 #library(gmp)
28 library(zoo)
29 library(nondetects)
30 library(Hmisc)
31 #library(missForest)
32 #library(mice)
33 })
34
35 cat("\n R libraries...loaded!\n")
36
37 args = commandArgs(trailingOnly=TRUE)
38 dpfiles<-basename(args[1])
39 path000<-dirname(args[1])
40 format<-args[2]
41 nfeatures<-args[3]
42 rawout<-args[4]
43 path <- args[5]
44 dcCtmin<-args[6]
45 dcCtmax<-args[7]
46 dcflag<-args[8]
47 x<-args[9]
48 normalizationMethod<-args[10]
49 if (normalizationMethod=="deltaCt") {
50 normalizers<-args[11]
51 outputNorm<-args[12]
52 outputECDF<-args[13]
53 percentofnastoremove<-args[14]
54 outputRemaining<-args[15]
55 imputeMethod<-args[16]
56 if (imputeMethod=="knn") {
57 kappa<- args[17]
58 macsp<-args[18]
59 outputIMP<-args[19]
60
61 DEAMethod<-args[20]
62 if (DEAMethod=="ttest") {
63 alternative<- args[21]
64 paired<-args[22]
65 replicates<- args[23]
66 sort<-args[24]
67 stringent<- args[25]
68 padjust<-args[26]
69 outputDEA<-args[27]
70 filtnames<-args[28]
71 } else {
72 outputDEA<-args[21]
73 filtnames<-args[22]
74 }
75 } else {
76 #mean, median, nondetects, cubic
77 outputIMP<-args[17]
78 DEAMethod<-args[18]
79 if (DEAMethod=="ttest") {
80 alternative<- args[19]
81 paired<-args[20]
82 replicates<- args[21]
83 sort<-args[22]
84 stringent<- args[23]
85 padjust<-args[24]
86 outputDEA<-args[25]
87 filtnames<-args[26]
88 } else {
89 outputDEA<-args[19]
90 filtnames<-args[20]
91 }
92 }
93 }else {
94 outputNorm<-args[11]
95 outputECDF<-args[12]
96 percentofnastoremove<-args[13]
97 outputRemaining<-args[14]
98 imputeMethod<-args[15]
99
100 if (imputeMethod=="knn") {
101 kappa<- args[16]
102 macsp<-args[17]
103 outputIMP<-args[18]
104
105 DEAMethod<-args[19]
106
107 if (DEAMethod=="ttest") {
108 alternative<- args[20]
109 paired<-args[21]
110 replicates<- args[22]
111 sort<-args[23]
112 stringent<- args[24]
113 padjust<-args[25]
114 outputDEA<-args[26]
115 filtnames<-args[27]
116 } else {
117 outputDEA<-args[20]
118 filtnames<-args[21]
119 }
120 } else {
121 #mean, median, nondetects, cubic
122 outputIMP<-args[16]
123 DEAMethod<-args[17]
124 if (DEAMethod=="ttest") {
125 alternative<- args[18]
126 paired<-args[19]
127 replicates<- args[20]
128 sort<-args[21]
129 stringent<- args[22]
130 padjust<-args[23]
131 outputDEA<-args[24]
132 filtnames<-args[25]
133
134 } else {
135 outputDEA<-args[18]
136 filtnames<-args[19]
137
138 }
139 }
140 }
141 cat("\n Initialization completed! \n")
142
143 readCtDataDav<-
144 function (files, path = NULL, n.features = 384, format = "plain",
145 column.info, flag, feature, type, position, Ct, header = FALSE,
146 SDS = FALSE, n.data = 1, samples, na.value = 40, sample.info,
147 ...)
148 {
149 if (missing(files))
150 stop("No input files specified")
151 if (length(files) != length(n.data) & length(n.data) != 1)
152 stop("n.data must either be a single integer, or same length as number of files")
153 if (length(n.data) == 1)
154 n.data <- rep(n.data, length(files))
155 nsamples <- sum(n.data)
156 ncum <- cumsum(n.data)
157 s.names <- NULL
158 nspots <- n.features
159 if (SDS) {
160 warning("Please use format='SDS'. The SDS' parameter is retained for backward compatibility only.")
161 format <- "SDS"
162 }
163 if (!missing(flag) | !missing(feature) | !missing(type) |
164 !missing(position) | !missing(Ct)) {
165 warning("Please use 'column.info' for providing a list of column numbers containing particular information. The use of 'flag', 'feature', 'type', 'position' and 'Ct' is deprecated and will be removed in future versions.")
166 }
167 if (missing(column.info)) {
168 column.info <- switch(format, EDS = list(flag="EXPFAIL", feature="Target.Name", position="Well.Position", Ct="CT"),
169 plain = list(flag = 4,
170 feature = 6, type = 7, position = 3, Ct = 8), SDS = list(flag = 4,
171 feature = 6, type = 7, position = 3, Ct = 8), LightCycler = list(feature = "Name",
172 position = "Pos", Ct = "Cp"), CFX = list(feature = "Content",
173 position = "Well", Ct = "Cq.Mean"), OpenArray = list(flag = "ThroughHole.Outlier",
174 feature = "Assay.Assay.ID", type = "Assay.Assay.Type",
175 position = "ThroughHole.Address", Ct = "ThroughHole.Ct"),
176 BioMark = list(flag = "Call", feature = "Name.1",
177 position = "ID", Ct = "Value"))
178 }
179 X <- matrix(0, nspots, nsamples)
180 X.flags <- as.data.frame(X)
181 X.cat <- data.frame(matrix("OK", ncol = nsamples, nrow = nspots),
182 stringsAsFactors = FALSE)
183 for (i in seq_along(files)) {
184 if (i == 1) {
185 cols <- 1:ncum[i]
186 }
187 else {
188 cols <- (ncum[i - 1] + 1):ncum[i]
189 }
190 readfile <- ifelse(is.null(path), files[i], file.path(path,
191 files[i]))
192 sample <- switch(format, EDS =.readCtEDS(readfile = readfile,
193 n.data = n.data, i = i, nspots = nspots, ...), plain = .readCtPlain(readfile = readfile,
194 header = header, n.features = n.features, n.data = n.data,
195 i = i, ...), SDS = .readCtSDS(readfile = readfile,
196 n.data = n.data, i = i, nspots = nspots, ...), LightCycler = .readCtLightCycler(readfile = readfile,
197 n.data = n.data, i = i, nspots = nspots, ...), CFX = .readCtCFX(readfile = readfile,
198 n.data = n.data, i = i, nspots = nspots, ...), OpenArray = .readCtOpenArray(readfile = readfile,
199 n.data = n.data, i = i, nspots = nspots, ...), BioMark = .readCtBioMark(readfile = readfile,
200 n.data = n.data, i = i, nspots = nspots, ...))
201
202 data <- matrix(sample[, column.info[["Ct"]]], ncol = n.data[i])
203 undeter <- apply(data, 2, function(x) x %in% c("Undetermined",
204 "No Ct"))
205 X.cat[, cols][undeter] <- "Undetermined"
206 nas <- c("Undetermined", "No Ct", "999", "N/A")
207 if (is.null(na.value)) {
208 data[data %in% nas | data == 0] <- NA
209 }
210 else {
211 data[data %in% nas | is.na(data) | data == 0] <- na.value
212 }
213 X[, cols] <- apply(data, 2, function(x) as.numeric(as.character(x)))
214 if ("flag" %in% names(column.info)) {
215 flags <- matrix(sample[, column.info[["flag"]]],
216 ncol = n.data[i])
217 flags[flags == "-"] <- "Failed"
218 flags[flags == "+"] <- "Passed"
219 X.flags[, cols] <- flags
220 }
221 else {
222 X.flags[, cols] <- "Passed"
223 }
224 if (format == "OpenArray") {
225 s.names <- c(s.names, unique(sample$SampleInfo.SampleID))
226 }
227 else if (format %in% c("EDS","plain", "SDS")) {
228 s.names <- c(s.names, unique(sample[, 2]))
229 }
230 else {
231 s.names <- s.names
232 }
233 if (i == 1) {
234 featPos <- paste("feature", 1:nspots, sep = "")
235 if ("position" %in% names(column.info))
236 featPos <- as.character(sample[1:nspots, column.info[["position"]]])
237 featType <- factor(rep("Target", nspots))
238 if ("type" %in% names(column.info))
239 featType <- sample[1:nspots, column.info[["type"]]]
240 featName <- paste("feature", 1:nspots, sep = "")
241 if ("feature" %in% names(column.info))
242 featName <- as.character(sample[1:nspots, column.info[["feature"]]])
243 df <- data.frame(featureNames = featName, featureType = as.factor(featType),
244 featurePos = featPos)
245 metaData <- data.frame(labelDescription = c("Name of the qPCR feature (gene)",
246 "Type pf feature", "Position on assay"))
247 featData <- AnnotatedDataFrame(data = df, varMetadata = metaData)
248 }
249 }
250 if (!missing(samples)) {
251 if (length(samples) < nsamples) {
252 warning("Not enough sample names provided; using Sample1, Sample2, ... instead\n")
253 samples <- paste("Sample", 1:nsamples, sep = "")
254 }
255 else if (length(samples) == nsamples) {
256 samples <- samples
257 }
258 }
259 else if (missing(samples)) {
260 if (length(files) == nsamples) {
261 samples <- gsub("(.+)\\..+", "\\1", files)
262 }
263 else if (length(s.names) == nsamples) {
264 samples <- s.names
265 }
266 else {
267 samples <- paste("Sample", 1:nsamples, sep = "")
268 }
269 }
270 samples <- make.unique(samples)
271 if (any(is.na(X)))
272 warning("One or more samples contain NAs. Consider replacing these with e.g. Ct=40 now.")
273 if (missing(sample.info)) {
274 pdata <- data.frame(sample = 1:length(samples), row.names = samples)
275 sample.info <- new("AnnotatedDataFrame", data = pdata,
276 varMetadata = data.frame(labelDescription = "Sample numbering",
277 row.names = "Sample names"))
278 }
279 X.hist <- data.frame(history = capture.output(match.call(readCtData)),
280 stringsAsFactors = FALSE)
281 out <- new("qPCRset", exprs = X, phenoData = sample.info,
282 featureData = featData, featureCategory = X.cat, flag = X.flags,
283 CtHistory = X.hist)
284 out
285 }
286 .readCtEDS <-
287 function(readfile=readfile, n.data=n.data, i=i, nspots=nspots, ...)
288 {
289 # Scan through beginning of file, max 100 lines
290 file.header <- readLines(con=readfile, n=100)
291 n.header <- grep("^Well", file.header)
292 if (length(n.header)==0)
293 n.header <- 0
294 # Read data, skip the required lines
295 out <- read.delim(file=readfile, header=TRUE, colClasses="character", nrows=nspots*n.data[i], skip=n.header-1, strip.white=TRUE, ...)
296 out
297 } # .readCtEDS
298
299 head(read.delim(file.path(path000, dpfiles), sep="\t"))
300 files <- read.delim(file.path(path000, dpfiles), sep="\t")
301 switch(format,
302 "EDS"={
303 columns<- list(flag="EXPFAIL", feature="Target.Name", position="Well.Position", Ct="CT")
304 metadata <- data.frame(labelDescription = c("sampleName", "Treatment"), row.names = c("sampleName", "Treatment"))
305 phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata)
306 rownames(phenoData)=as.vector(files$sampleName)
307 raw<- readCtDataDav(files = as.vector(files$sampleName), header=TRUE, format="EDS", column.info=columns, path = path,sample.info=phenoData,n.features = as.numeric(nfeatures))
308 },
309 "plain"={
310 metadata <- data.frame(labelDescription = c("sampleName", "Treatment"), row.names = c("sampleName", "Treatment"))
311 phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata)
312 rownames(phenoData)=as.vector(files$sampleName)
313 raw<- readCtDataDav(files = as.vector(files$sampleName), header=FALSE, format="plain", path = path, sample.info=phenoData,n.features = as.numeric(nfeatures))
314 },
315 "SDS"={
316 columns<- list(feature=3, Ct=6, flag=11)
317 metadata <- data.frame(labelDescription = c("sampleName", "Treatment"), row.names = c("sampleName", "Treatment"))
318 phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata)
319 rownames(phenoData)=as.vector(files$sampleName)
320 raw<- readCtDataDav(files = files$sampleName, format="SDS",column.info=columns, path = path, sample.info=phenoData, n.features=as.numeric(nfeatures))
321 },
322 "LightCycler"={
323 metadata <- data.frame(labelDescription = c("sampleName", "Treatment"), row.names = c("sampleName", "Treatment"))
324 phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata)
325 rownames(phenoData)=as.vector(files$sampleName)
326 raw <- readCtDataDav(files = files$sampleName, path = path, format = "LightCycler", sample.info=phenoData,n.features = as.numeric(nfeatures))
327 },
328 "CFX"={
329 metadata <- data.frame(labelDescription = c("sampleName", "Treatment"), row.names = c("sampleName", "Treatment"))
330 phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata)
331 rownames(phenoData)=as.vector(files$sampleName)
332 raw <- readCtDataDav(files = files$sampleName, path = path, format = "CFX", sample.info=phenoData,n.features = as.numeric(nfeatures))
333 },
334 "OpenArray"={
335 metadata <- data.frame(labelDescription = c("sampleName", "Treatment"), row.names = c("sampleName", "Treatment"))
336 phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata)
337 rownames(phenoData)=as.vector(files$sampleName)
338 raw <- readCtDataDav(files = files$sampleName, path = path, format = "OpenArray", sample.info=phenoData,n.features = as.numeric(nfeatures))
339 },
340 "BioMark"={
341 metadata <- data.frame(labelDescription = c("sampleName", "Treatment"), row.names = c("sampleName", "Treatment"))
342 phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata)
343 rownames(phenoData)=as.vector(files$sampleName)
344 raw <- readCtDataDav(files = files$sampleName, path = path, format = "BioMark", sample.info=phenoData, n.features = as.numeric(nfeatures))
345 },
346 stop("Enter something that switches me!")
347 )
348 cat("\n Files read correctly! ")
349
350 write.table(exprs(raw), file=rawout, quote=FALSE, row.names=TRUE, col.names=TRUE,sep = "\t")
351 ####################################################################################################################
352 #Set a new categories for the values meeting two criterions
353 switch(format,
354 "EDS"={
355 unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE, flag=dcflag, flag.out="Y", quantile=NULL)
356 },
357 "plain"={
358 unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE, flag=dcflag, flag.out="Flagged", quantile=NULL)
359 },
360 "SDS"={
361 unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE, flag=dcflag, flag.out="TRUE", quantile=NULL)
362 },
363 "LightCycler"={
364 unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE, flag=dcflag, flag.out="Y", quantile=NULL)
365 },
366 "CFX"={
367 unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE, flag=dcflag, flag.out="Y", quantile=NULL)
368 },
369 "OpenArray"={
370 unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE, flag=dcflag, flag.out="TRUE ", quantile=NULL)
371 },
372 "BioMark"={
373 unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE, flag=dcflag, flag.out="Fail", quantile=NULL)
374 },
375 stop("Enter something that switches me!")
376 )
377 #unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE, flag=dcflag, flag.out="Y", quantile=NULL)
378
379 ####################################################################################################################
380 #Filter out the values of the new category
381 xFilter <- filterCategory(unreliable)
382
383 cat("\n Categorization completed! ")
384
385 png(x, # create PNG for the heat map
386 width = 10*300, # 5 x 300 pixels
387 height = 10*300,
388 res = 300, # 300 pixels per inch
389 pointsize = 8)
390 plotCtBoxes(xFilter, stratify=NULL, xlab = "Samples", ylab="Ct", names=as.character(seq(1, ncol(xFilter), 1))) # smaller font size
391 dev.off()
392
393 #write.table(exprs(xFilter), file=x, quote=FALSE, row.names=TRUE, col.names=TRUE,sep = "\t")
394
395 ####################################################################################################################
396 #NORMALIZATION
397 #method version 3.5.1 + Global mean
398 normalizeCtDataDav <-
399 function(q,
400 norm = "deltaCt",
401 deltaCt.genes = NULL,
402 scale.rank.samples,
403 rank.type = "pseudo.median",
404 Ct.max = dcCtmax,
405 geo.mean.ref,
406 verbose = TRUE)
407 {
408 # Extract the data
409 data <- exprs(q)
410 data.norm <- data
411 # Get the normalisation method
412 method <- match.arg(norm, c("quantile", "scale.rankinvariant", "norm.rankinvariant", "deltaCt", "geometric.mean", "globalmean"))
413 # Some general stuff that will be used by both rank.invariant methods
414 if (method %in% c("scale.rankinvariant", "norm.rankinvariant")) {
415 # Index to use for too high Ct values
416 #Ct.index <- data>Ct.max
417 data.Ctmax <- data
418 Ct.index <- is.na(data)
419 #data.Ctmax[Ct.index] <- NA
420 data<-na.spline(data)
421 # Define what to rank against
422 if (rank.type=="pseudo.median") {
423 ref.data <- apply(data.Ctmax, 1, median, na.rm=TRUE)
424 } else if (rank.type=="pseudo.mean") {
425 ref.data <- apply(data.Ctmax, 1, mean, na.rm=TRUE)
426 }
427 # Mark + replace NA values with something temporary
428 na.index <- is.na(ref.data)
429 ref.data[na.index] <- 30
430 # Run the rank.invariant function
431 data.rankinvar <- apply(data, 2, normalize.invariantset, ref=ref.data)
432 }
433 # The actual normalisation
434 switch(method,
435 quantile = {
436 # Use an internal limma function
437 data.norm <- normalizeQuantiles(data)
438 },
439 scale.rankinvariant = {
440 # Get all the rank invariant genes
441 ri.genes <- sapply(data.rankinvar, "[[", "i.set")
442 # Remove those with too high Ct values
443 ri.genes[Ct.index] <- FALSE
444 # Remove those that were all NA for potentially other reasons
445 ri.genes[na.index,] <- FALSE
446 # Select those to use here
447 ri.count <- rowSums(ri.genes)
448 if (missing(scale.rank.samples))
449 scale.rank.samples <- ncol(data)-1
450 ri.index <- ri.count >= scale.rank.samples
451 if (sum(ri.index)==0)
452 stop(paste("No rank invariant genes were found across", scale.rank.samples, "samples"))
453 # Extract the corresponding Ct values; average
454 ri.mean <- colMeans(data[ri.index,,drop=FALSE])
455 ri.scale <- ri.mean/ri.mean[1]
456 # Correct the data
457 data.norm <- t(t(data)*ri.scale)
458 # Print info
459 if (verbose) {
460 cat(c("Scaling Ct values\n\tUsing rank invariant genes:", paste(featureNames(q)[ri.index], collapse=" "), "\n"))
461 cat(c("\tScaling factors:", format(ri.scale, digits=3), "\n"))
462 }
463 },
464 norm.rankinvariant = {
465 # Print info
466 if (verbose)
467 cat("Normalizing Ct values\n\tUsing rank invariant genes:\n")
468 # Correct the data based on the calculations above
469 for (i in 1:ncol(data)) {
470 # Check if there are any rank invariant genes
471 ri.sub <- data.rankinvar[[i]]
472 ri.genes <- ri.sub[["i.set"]]
473 # Remove those that don't pass the Ct.max criteria
474 ri.genes[Ct.index[,i]] <- FALSE
475 # Remove those that are NA for other reasons
476 ri.genes[na.index] <- FALSE
477 if (sum(ri.genes)==0) {
478 warning(paste("\tNo rank invariant genes were found for sample ", sampleNames(q)[i], "; sample not normalized\n", sep=""))
479 next
480 }
481 # If verbose, print some info
482 if (verbose)
483 cat(paste("\t", sampleNames(q)[i], ": ", sum(ri.genes), " rank invariant genes\n", sep=""))
484 # The actual correction
485 data.norm[,i] <- as.numeric(approx(ri.sub$n.curve$y, ri.sub$n.curve$x, xout=data[,i], rule=2)$y)
486 }
487 },
488 deltaCt = {
489 # Which are the reference genes (endogenous controls)
490 if (is.null(deltaCt.genes))
491 deltaCt.genes <- unique(featureNames(q)[featureType(q)=="Endogenous Control"])
492 c.index <- featureNames(q) %in% deltaCt.genes
493 if (verbose) {
494 cat(c("Calculating deltaCt values\n\tUsing control gene(s):", paste(deltaCt.genes, collapse=" "), "\n"))
495 }
496 # Run though all cards; perform internal normalisation
497 for (c in 1:ncol(data)) {
498 # Calculate the control genes
499 refCt <- mean(data[c.index,c], na.rm=TRUE)
500 refsd <- sd(data[c.index,c], na.rm=TRUE)
501 # Difference for target and controls
502 data.norm[,c] <- data[,c]-refCt
503 # Print results
504 if (verbose)
505 cat(paste("\tCard ", c, ":\tMean=", format(refCt, dig=4), "\tStdev=", format(refsd, dig=3), "\n", sep=""))
506 }
507 },
508 geometric.mean = {
509 # For each column, calculate the geometric mean of Ct values<Ct.max
510 #geo.mean <- apply(data, 2, function(x) {
511 # xx <- log2(subset(x, x<Ct.max))
512 # 2^mean(xx)})
513 geo.mean <- apply(data, 2, function(x) {
514 xx <- subset(x, x<=Ct.max)
515 geometric.mean(xx)})
516 # Which sample to scale to
517 #if (missing(geo.mean.ref))
518 # geo.mean.ref <- 1
519 # Calculate the scaling factor
520 #geo.scale <- geo.mean/geo.mean[geo.mean.ref]
521 # Adjust the data accordingly
522 data.norm <- t(t(data) - geo.mean)
523 if (verbose) {
524 cat(c("Scaling Ct values\n\tUsing geometric mean within each sample\n"))
525 #cat(c("\tScaling factors:", format(geo.scale, digits=3), "\n"))
526 }
527 } # switch
528 ,globalmean = {
529 glo <- apply(data, 2, function(x) {
530 xx <- subset(x, x <= Ct.max)
531 mean(xx)
532 })
533 data.norm <- t(t(data) - glo)
534 }
535 )
536 # Replace with the normalised Ct exprs
537 exprs(q) <- data.norm
538 # Add to the history of the object
539 if (nrow(getCtHistory(q))==0)
540 setCtHistory(q) <- data.frame(history="Manually created qPCRset object.", stringsAsFactors=FALSE)
541 setCtHistory(q) <- rbind(getCtHistory(q), capture.output(match.call(normalizeCtData)))
542 # Return the normalised object
543 q
544 }
545
546
547 switch(normalizationMethod,
548 "deltaCt"={
549 normalizedDataset <- normalizeCtDataDav(xFilter, norm="deltaCt", deltaCt.genes =explode(normalizers, sep = ","))
550 },
551 "quantile"={
552 normalizedDataset <- normalizeCtDataDav(xFilter, norm=normalizationMethod)
553 },
554 "scale.rankinvariant"={
555 normalizedDataset <- normalizeCtDataDav(xFilter, norm=normalizationMethod)
556 },
557 "norm.rankinvariant"={
558 normalizedDataset <- normalizeCtDataDav(xFilter, norm=normalizationMethod)
559 },
560 "geometric.mean"={
561 normalizedDataset <- normalizeCtDataDav(xFilter, norm=normalizationMethod)
562 },
563 "globalmean"={
564 normalizedDataset <- normalizeCtDataDav(xFilter, norm=normalizationMethod)
565 },
566 stop("Enter something that switches me!")
567 )
568
569 #if (normalizationMethod=="deltaCt") {
570 #normalize CT data
571
572 #normalizedDataset <- normalizeCtDataDav(xFilter, norm="deltaCt", deltaCt.genes =explode(normalizers, sep = ","))
573 #} else {
574 #normalizedDataset <- normalizeCtDataDav(xFilter, norm=normalizationMethod)
575
576 #}
577 cat("\n Data normalized correctly! \n")
578 write.table(exprs(normalizedDataset), file=outputNorm, quote=FALSE, row.names=TRUE, col.names=TRUE,sep = "\t")
579
580
581 #normalizedDataset
582 ####################################################################################################################
583 #Check noise reduction by empirical cumulative distribution
584
585 #X = rnorm(100) # X is a sample of 100 normally distributed random variables
586 # P = ecdf(X) # P is a function giving the empirical CDF of X
587 #Y = rnorm(1000) # X is a sample of 100 normally distributed random variables
588 # PY = ecdf(Y)
589 #plotâ„—
590
591 #lines(PY)
592 png(outputECDF, # create PNG for the heat map
593 width = 10*300, # 5 x 300 pixels
594 height = 10*300,
595 res = 300, # 300 pixels per inch
596 pointsize = 8) # smaller font size
597 vec=c()
598 for (i in 1:nrow(exprs(xFilter))){
599 CVX<-(sd(2^-exprs(xFilter)[i,], na.rm = TRUE)/mean(2^-exprs(xFilter)[i,], na.rm = TRUE))*100
600 vec=c(vec, c(CVX))
601 }
602 vec<-na.omit(vec)
603 P = ecdf(vec)
604 gm=c()
605 for (i in 1:nrow(exprs(normalizedDataset))){
606 CVGM<-(sd(2^-exprs(normalizedDataset)[i,], na.rm = TRUE)/mean(2^-exprs(normalizedDataset)[i,], na.rm = TRUE))*100
607 gm=c(gm, c(CVGM))
608 }
609 gm<-na.omit(gm)
610
611 PY = ecdf(gm)
612
613 plot_colors <- c(rgb(r=0.0,g=0.0,b=0.9), "red", "forestgreen",rgb(r=0.0,g=0.0,b=0.0),rgb(r=0.5,g=0.0,b=0.3),rgb(r=0.0,g=0.4,b=0.4))
614
615 plot(P,col=plot_colors[1],xlim=c(0.0,600), ylim=c(0.0,1),xaxp = c(0.0, 600, 6),yaxp = c(0.0, 1, 10), cex=1.3, lwd=5, main=NULL,xlab="CV(%)",ylab="Empirical Cumulative Distribution")
616 lines(PY, lwd=5, col=plot_colors[6],cex=1.3)
617 legend("bottomright", c("not normalized", "normalized"), cex=1.3, col=c(plot_colors[1],plot_colors[6]), lwd=c(5,5));
618 dev.off()
619
620 #Two-sample Kolmogorov-Smirnov
621 ks.test(vec,gm)
622
623
624 #write.table(exprs(qFiltNAs), file=outputIMP, quote=FALSE, row.names=TRUE, col.names=TRUE,sep = "\t")
625 png(outputIMP, # create PNG for the heat map
626 width = 10*300, # 5 x 300 pixels
627 height = 10*300,
628 res = 300, # 300 pixels per inch
629 pointsize = 8)
630 plotCtBoxes(normalizedDataset, stratify=NULL, xlab = "Samples", ylab="DeltaCt", names=as.character(seq(1, ncol(normalizedDataset), 1))) # smaller font size
631 dev.off()
632
633 ################################################## Filtering based on number of NAs##################################################
634
635 #FILTERING on the basis of NAs
636 #qPCRset.R
637 setMethod("exprs", signature(object="qPCRset"), definition =
638 function (object) {x <- assayDataElement(object, "exprs"); rownames(x) <- featureNames(object); colnames(x) <- sampleNames(phenoData(object));x}
639 )
640 ncatn <- as.integer(n.samples(normalizedDataset))*as.integer(percentofnastoremove)/100
641
642 qFiltNAs <- filterCtData(normalizedDataset, remove.category=c("Undetermined","Unreliable"), n.category=as.integer(ncatn),remove.name=explode(filtnames, sep = ","))
643
644 cat("\n Data filtered correctly! \n")
645
646 if (is.na(exprs(qFiltNAs))){
647 switch(imputeMethod,
648 "knn"={
649 imp<-impute.knn(exprs(qFiltNAs) ,k = as.integer(kappa), maxp = as.integer(macsp), rng.seed=362436069)
650 exprs(qFiltNAs)=imp$data
651 },
652 "mestdagh"={
653 #Mesdagh
654 #sostituisce a NA -1000
655 for (i in 1:nrow(exprs(qFiltNAs))){
656 for(j in 1:ncol(exprs(qFiltNAs))){
657 if(is.na(exprs(qFiltNAs)[i,j])>0)exprs(qFiltNAs)[i,j]<- -1000
658 }
659 }
660 temp<-exprs(qFiltNAs)
661 for (i in 1:nrow(exprs(qFiltNAs))){
662 for(j in 1:ncol(exprs(qFiltNAs))){
663 if(exprs(qFiltNAs)[i,j]<(-100))exprs(qFiltNAs)[i,j]<- max(temp[i,])+1
664 }
665 }
666 },
667 "cubic"={
668 exprs(qFiltNAs) <- na.spline(exprs(qFiltNAs))
669 },
670 "mean"={
671 exprs(qFiltNAs)<-impute(exprs(qFiltNAs),mean)
672 },
673 "median"={
674 exprs(qFiltNAs)<-impute(exprs(qFiltNAs),median)
675 },
676 "nondetects"={
677 qFiltNAs <- qpcrImpute(qFiltNAs, outform=c("Single"),linkglm = c("logit"))
678 },
679 stop("Enter something that switches me!")
680 )
681
682 cat("\n Imputation completed! \n")
683 }else{
684 cat("\n Nothing to impute! \n")
685 }
686
687 write.table(exprs(qFiltNAs), file=outputRemaining, quote=FALSE, row.names=TRUE, col.names=TRUE, sep = "\t")
688
689 if (DEAMethod=="ttest") {
690 #Differential expression analysis (paired t test+BH). Returns Fold change in linear scale.
691 DEG<-ttestCtData(qFiltNAs, groups = files$Treatment, alternative = alternative, paired = ifelse(paired=="TRUE", TRUE, FALSE), replicates =ifelse(replicates=="TRUE", TRUE, FALSE), sort=ifelse(sort=="TRUE", TRUE, FALSE), stringent=ifelse(stringent=="TRUE", TRUE, FALSE), p.adjust=padjust)
692 write.table(DEG, file=outputDEA, quote=FALSE, row.names=TRUE, col.names=TRUE,sep = "\t")
693 }
694 if (DEAMethod=="rp") {
695 DEG<-RP(exprs(qFiltNAs), as.numeric(pData(qFiltNAs)$Treatment)-1, num.perm = 1000,logged = TRUE, gene.names = featureNames(qFiltNAs), huge=TRUE, plot = FALSE, rand = 123)
696 write.table(DEG[1:5], file=outputDEA, quote=FALSE, row.names=TRUE, col.names=TRUE,sep = "\t")
697 }
698 cat("\n Differential expression analysis completed correctly! \n")
699 cat("\n Workflow ended correctly! \n")