Mercurial > repos > davidecangelosi > pipe_t
comparison pipe-t.R @ 0:185ba61836ab draft
planemo upload for repository https://github.com/igg-molecular-biology-lab/pipe-t.git commit 04049039da97e1c9a8048e732afca48f2741cadf
author | davidecangelosi |
---|---|
date | Thu, 02 May 2019 04:49:04 -0400 |
parents | |
children | 6cd22b1fbf6d |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:185ba61836ab |
---|---|
1 | |
2 cat("\n Started! \n") | |
3 #sessionInfo() | |
4 | |
5 | |
6 # Send R errors to stderr | |
7 options(show.error.messages = F, error = function(){cat(geterrmessage(), file = stderr()); q("no", 1, F)}) | |
8 | |
9 # Avoid crashing Galaxy with an UTF8 error on German LC settings | |
10 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
11 suppressPackageStartupMessages({ | |
12 library(HTqPCR) | |
13 library(base) | |
14 library(Biobase) | |
15 library(utils) | |
16 library(stats) | |
17 library(graphics) | |
18 library(grDevices) | |
19 library(RColorBrewer) | |
20 library(limma) | |
21 library(RankProd) | |
22 library(methods) | |
23 library(impute) | |
24 library(BBmisc) | |
25 library(affy) | |
26 library(psych) | |
27 #library(gmp) | |
28 library(zoo) | |
29 library(nondetects) | |
30 library(Hmisc) | |
31 #library(missForest) | |
32 #library(mice) | |
33 }) | |
34 | |
35 cat("\n R libraries...loaded!\n") | |
36 | |
37 args = commandArgs(trailingOnly=TRUE) | |
38 dpfiles<-basename(args[1]) | |
39 path000<-dirname(args[1]) | |
40 format<-args[2] | |
41 nfeatures<-args[3] | |
42 rawout<-args[4] | |
43 path <- args[5] | |
44 dcCtmin<-args[6] | |
45 dcCtmax<-args[7] | |
46 dcflag<-args[8] | |
47 x<-args[9] | |
48 normalizationMethod<-args[10] | |
49 if (normalizationMethod=="deltaCt") { | |
50 normalizers<-args[11] | |
51 outputNorm<-args[12] | |
52 outputECDF<-args[13] | |
53 percentofnastoremove<-args[14] | |
54 outputRemaining<-args[15] | |
55 imputeMethod<-args[16] | |
56 if (imputeMethod=="knn") { | |
57 kappa<- args[17] | |
58 macsp<-args[18] | |
59 outputIMP<-args[19] | |
60 | |
61 DEAMethod<-args[20] | |
62 if (DEAMethod=="ttest") { | |
63 alternative<- args[21] | |
64 paired<-args[22] | |
65 replicates<- args[23] | |
66 sort<-args[24] | |
67 stringent<- args[25] | |
68 padjust<-args[26] | |
69 outputDEA<-args[27] | |
70 filtnames<-args[28] | |
71 } else { | |
72 outputDEA<-args[21] | |
73 filtnames<-args[22] | |
74 } | |
75 } else { | |
76 #mean, median, nondetects, cubic | |
77 outputIMP<-args[17] | |
78 DEAMethod<-args[18] | |
79 if (DEAMethod=="ttest") { | |
80 alternative<- args[19] | |
81 paired<-args[20] | |
82 replicates<- args[21] | |
83 sort<-args[22] | |
84 stringent<- args[23] | |
85 padjust<-args[24] | |
86 outputDEA<-args[25] | |
87 filtnames<-args[26] | |
88 } else { | |
89 outputDEA<-args[19] | |
90 filtnames<-args[20] | |
91 } | |
92 } | |
93 }else { | |
94 outputNorm<-args[11] | |
95 outputECDF<-args[12] | |
96 percentofnastoremove<-args[13] | |
97 outputRemaining<-args[14] | |
98 imputeMethod<-args[15] | |
99 | |
100 if (imputeMethod=="knn") { | |
101 kappa<- args[16] | |
102 macsp<-args[17] | |
103 outputIMP<-args[18] | |
104 | |
105 DEAMethod<-args[19] | |
106 | |
107 if (DEAMethod=="ttest") { | |
108 alternative<- args[20] | |
109 paired<-args[21] | |
110 replicates<- args[22] | |
111 sort<-args[23] | |
112 stringent<- args[24] | |
113 padjust<-args[25] | |
114 outputDEA<-args[26] | |
115 filtnames<-args[27] | |
116 } else { | |
117 outputDEA<-args[20] | |
118 filtnames<-args[21] | |
119 } | |
120 } else { | |
121 #mean, median, nondetects, cubic | |
122 outputIMP<-args[16] | |
123 DEAMethod<-args[17] | |
124 if (DEAMethod=="ttest") { | |
125 alternative<- args[18] | |
126 paired<-args[19] | |
127 replicates<- args[20] | |
128 sort<-args[21] | |
129 stringent<- args[22] | |
130 padjust<-args[23] | |
131 outputDEA<-args[24] | |
132 filtnames<-args[25] | |
133 | |
134 } else { | |
135 outputDEA<-args[18] | |
136 filtnames<-args[19] | |
137 | |
138 } | |
139 } | |
140 } | |
141 cat("\n Initialization completed! \n") | |
142 | |
143 readCtDataDav<- | |
144 function (files, path = NULL, n.features = 384, format = "plain", | |
145 column.info, flag, feature, type, position, Ct, header = FALSE, | |
146 SDS = FALSE, n.data = 1, samples, na.value = 40, sample.info, | |
147 ...) | |
148 { | |
149 if (missing(files)) | |
150 stop("No input files specified") | |
151 if (length(files) != length(n.data) & length(n.data) != 1) | |
152 stop("n.data must either be a single integer, or same length as number of files") | |
153 if (length(n.data) == 1) | |
154 n.data <- rep(n.data, length(files)) | |
155 nsamples <- sum(n.data) | |
156 ncum <- cumsum(n.data) | |
157 s.names <- NULL | |
158 nspots <- n.features | |
159 if (SDS) { | |
160 warning("Please use format='SDS'. The SDS' parameter is retained for backward compatibility only.") | |
161 format <- "SDS" | |
162 } | |
163 if (!missing(flag) | !missing(feature) | !missing(type) | | |
164 !missing(position) | !missing(Ct)) { | |
165 warning("Please use 'column.info' for providing a list of column numbers containing particular information. The use of 'flag', 'feature', 'type', 'position' and 'Ct' is deprecated and will be removed in future versions.") | |
166 } | |
167 if (missing(column.info)) { | |
168 column.info <- switch(format, EDS = list(flag="EXPFAIL", feature="Target.Name", position="Well.Position", Ct="CT"), | |
169 plain = list(flag = 4, | |
170 feature = 6, type = 7, position = 3, Ct = 8), SDS = list(flag = 4, | |
171 feature = 6, type = 7, position = 3, Ct = 8), LightCycler = list(feature = "Name", | |
172 position = "Pos", Ct = "Cp"), CFX = list(feature = "Content", | |
173 position = "Well", Ct = "Cq.Mean"), OpenArray = list(flag = "ThroughHole.Outlier", | |
174 feature = "Assay.Assay.ID", type = "Assay.Assay.Type", | |
175 position = "ThroughHole.Address", Ct = "ThroughHole.Ct"), | |
176 BioMark = list(flag = "Call", feature = "Name.1", | |
177 position = "ID", Ct = "Value")) | |
178 } | |
179 X <- matrix(0, nspots, nsamples) | |
180 X.flags <- as.data.frame(X) | |
181 X.cat <- data.frame(matrix("OK", ncol = nsamples, nrow = nspots), | |
182 stringsAsFactors = FALSE) | |
183 for (i in seq_along(files)) { | |
184 if (i == 1) { | |
185 cols <- 1:ncum[i] | |
186 } | |
187 else { | |
188 cols <- (ncum[i - 1] + 1):ncum[i] | |
189 } | |
190 readfile <- ifelse(is.null(path), files[i], file.path(path, | |
191 files[i])) | |
192 sample <- switch(format, EDS =.readCtEDS(readfile = readfile, | |
193 n.data = n.data, i = i, nspots = nspots, ...), plain = .readCtPlain(readfile = readfile, | |
194 header = header, n.features = n.features, n.data = n.data, | |
195 i = i, ...), SDS = .readCtSDS(readfile = readfile, | |
196 n.data = n.data, i = i, nspots = nspots, ...), LightCycler = .readCtLightCycler(readfile = readfile, | |
197 n.data = n.data, i = i, nspots = nspots, ...), CFX = .readCtCFX(readfile = readfile, | |
198 n.data = n.data, i = i, nspots = nspots, ...), OpenArray = .readCtOpenArray(readfile = readfile, | |
199 n.data = n.data, i = i, nspots = nspots, ...), BioMark = .readCtBioMark(readfile = readfile, | |
200 n.data = n.data, i = i, nspots = nspots, ...)) | |
201 | |
202 data <- matrix(sample[, column.info[["Ct"]]], ncol = n.data[i]) | |
203 undeter <- apply(data, 2, function(x) x %in% c("Undetermined", | |
204 "No Ct")) | |
205 X.cat[, cols][undeter] <- "Undetermined" | |
206 nas <- c("Undetermined", "No Ct", "999", "N/A") | |
207 if (is.null(na.value)) { | |
208 data[data %in% nas | data == 0] <- NA | |
209 } | |
210 else { | |
211 data[data %in% nas | is.na(data) | data == 0] <- na.value | |
212 } | |
213 X[, cols] <- apply(data, 2, function(x) as.numeric(as.character(x))) | |
214 if ("flag" %in% names(column.info)) { | |
215 flags <- matrix(sample[, column.info[["flag"]]], | |
216 ncol = n.data[i]) | |
217 flags[flags == "-"] <- "Failed" | |
218 flags[flags == "+"] <- "Passed" | |
219 X.flags[, cols] <- flags | |
220 } | |
221 else { | |
222 X.flags[, cols] <- "Passed" | |
223 } | |
224 if (format == "OpenArray") { | |
225 s.names <- c(s.names, unique(sample$SampleInfo.SampleID)) | |
226 } | |
227 else if (format %in% c("EDS","plain", "SDS")) { | |
228 s.names <- c(s.names, unique(sample[, 2])) | |
229 } | |
230 else { | |
231 s.names <- s.names | |
232 } | |
233 if (i == 1) { | |
234 featPos <- paste("feature", 1:nspots, sep = "") | |
235 if ("position" %in% names(column.info)) | |
236 featPos <- as.character(sample[1:nspots, column.info[["position"]]]) | |
237 featType <- factor(rep("Target", nspots)) | |
238 if ("type" %in% names(column.info)) | |
239 featType <- sample[1:nspots, column.info[["type"]]] | |
240 featName <- paste("feature", 1:nspots, sep = "") | |
241 if ("feature" %in% names(column.info)) | |
242 featName <- as.character(sample[1:nspots, column.info[["feature"]]]) | |
243 df <- data.frame(featureNames = featName, featureType = as.factor(featType), | |
244 featurePos = featPos) | |
245 metaData <- data.frame(labelDescription = c("Name of the qPCR feature (gene)", | |
246 "Type pf feature", "Position on assay")) | |
247 featData <- AnnotatedDataFrame(data = df, varMetadata = metaData) | |
248 } | |
249 } | |
250 if (!missing(samples)) { | |
251 if (length(samples) < nsamples) { | |
252 warning("Not enough sample names provided; using Sample1, Sample2, ... instead\n") | |
253 samples <- paste("Sample", 1:nsamples, sep = "") | |
254 } | |
255 else if (length(samples) == nsamples) { | |
256 samples <- samples | |
257 } | |
258 } | |
259 else if (missing(samples)) { | |
260 if (length(files) == nsamples) { | |
261 samples <- gsub("(.+)\\..+", "\\1", files) | |
262 } | |
263 else if (length(s.names) == nsamples) { | |
264 samples <- s.names | |
265 } | |
266 else { | |
267 samples <- paste("Sample", 1:nsamples, sep = "") | |
268 } | |
269 } | |
270 samples <- make.unique(samples) | |
271 if (any(is.na(X))) | |
272 warning("One or more samples contain NAs. Consider replacing these with e.g. Ct=40 now.") | |
273 if (missing(sample.info)) { | |
274 pdata <- data.frame(sample = 1:length(samples), row.names = samples) | |
275 sample.info <- new("AnnotatedDataFrame", data = pdata, | |
276 varMetadata = data.frame(labelDescription = "Sample numbering", | |
277 row.names = "Sample names")) | |
278 } | |
279 X.hist <- data.frame(history = capture.output(match.call(readCtData)), | |
280 stringsAsFactors = FALSE) | |
281 out <- new("qPCRset", exprs = X, phenoData = sample.info, | |
282 featureData = featData, featureCategory = X.cat, flag = X.flags, | |
283 CtHistory = X.hist) | |
284 out | |
285 } | |
286 .readCtEDS <- | |
287 function(readfile=readfile, n.data=n.data, i=i, nspots=nspots, ...) | |
288 { | |
289 # Scan through beginning of file, max 100 lines | |
290 file.header <- readLines(con=readfile, n=100) | |
291 n.header <- grep("^Well", file.header) | |
292 if (length(n.header)==0) | |
293 n.header <- 0 | |
294 # Read data, skip the required lines | |
295 out <- read.delim(file=readfile, header=TRUE, colClasses="character", nrows=nspots*n.data[i], skip=n.header-1, strip.white=TRUE, ...) | |
296 out | |
297 } # .readCtEDS | |
298 | |
299 head(read.delim(file.path(path000, dpfiles), sep="\t")) | |
300 files <- read.delim(file.path(path000, dpfiles), sep="\t") | |
301 switch(format, | |
302 "EDS"={ | |
303 columns<- list(flag="EXPFAIL", feature="Target.Name", position="Well.Position", Ct="CT") | |
304 metadata <- data.frame(labelDescription = c("sampleName", "Treatment"), row.names = c("sampleName", "Treatment")) | |
305 phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata) | |
306 rownames(phenoData)=as.vector(files$sampleName) | |
307 raw<- readCtDataDav(files = as.vector(files$sampleName), header=TRUE, format="EDS", column.info=columns, path = path,sample.info=phenoData,n.features = as.numeric(nfeatures)) | |
308 }, | |
309 "plain"={ | |
310 metadata <- data.frame(labelDescription = c("sampleName", "Treatment"), row.names = c("sampleName", "Treatment")) | |
311 phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata) | |
312 rownames(phenoData)=as.vector(files$sampleName) | |
313 raw<- readCtDataDav(files = as.vector(files$sampleName), header=FALSE, format="plain", path = path, sample.info=phenoData,n.features = as.numeric(nfeatures)) | |
314 }, | |
315 "SDS"={ | |
316 columns<- list(feature=3, Ct=6, flag=11) | |
317 metadata <- data.frame(labelDescription = c("sampleName", "Treatment"), row.names = c("sampleName", "Treatment")) | |
318 phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata) | |
319 rownames(phenoData)=as.vector(files$sampleName) | |
320 raw<- readCtDataDav(files = files$sampleName, format="SDS",column.info=columns, path = path, sample.info=phenoData, n.features=as.numeric(nfeatures)) | |
321 }, | |
322 "LightCycler"={ | |
323 metadata <- data.frame(labelDescription = c("sampleName", "Treatment"), row.names = c("sampleName", "Treatment")) | |
324 phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata) | |
325 rownames(phenoData)=as.vector(files$sampleName) | |
326 raw <- readCtDataDav(files = files$sampleName, path = path, format = "LightCycler", sample.info=phenoData,n.features = as.numeric(nfeatures)) | |
327 }, | |
328 "CFX"={ | |
329 metadata <- data.frame(labelDescription = c("sampleName", "Treatment"), row.names = c("sampleName", "Treatment")) | |
330 phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata) | |
331 rownames(phenoData)=as.vector(files$sampleName) | |
332 raw <- readCtDataDav(files = files$sampleName, path = path, format = "CFX", sample.info=phenoData,n.features = as.numeric(nfeatures)) | |
333 }, | |
334 "OpenArray"={ | |
335 metadata <- data.frame(labelDescription = c("sampleName", "Treatment"), row.names = c("sampleName", "Treatment")) | |
336 phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata) | |
337 rownames(phenoData)=as.vector(files$sampleName) | |
338 raw <- readCtDataDav(files = files$sampleName, path = path, format = "OpenArray", sample.info=phenoData,n.features = as.numeric(nfeatures)) | |
339 }, | |
340 "BioMark"={ | |
341 metadata <- data.frame(labelDescription = c("sampleName", "Treatment"), row.names = c("sampleName", "Treatment")) | |
342 phenoData <- new("AnnotatedDataFrame", data = files, varMetadata = metadata) | |
343 rownames(phenoData)=as.vector(files$sampleName) | |
344 raw <- readCtDataDav(files = files$sampleName, path = path, format = "BioMark", sample.info=phenoData, n.features = as.numeric(nfeatures)) | |
345 }, | |
346 stop("Enter something that switches me!") | |
347 ) | |
348 cat("\n Files read correctly! ") | |
349 | |
350 write.table(exprs(raw), file=rawout, quote=FALSE, row.names=TRUE, col.names=TRUE,sep = "\t") | |
351 #################################################################################################################### | |
352 #Set a new categories for the values meeting two criterions | |
353 switch(format, | |
354 "EDS"={ | |
355 unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE, flag=dcflag, flag.out="Y", quantile=NULL) | |
356 }, | |
357 "plain"={ | |
358 unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE, flag=dcflag, flag.out="Flagged", quantile=NULL) | |
359 }, | |
360 "SDS"={ | |
361 unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE, flag=dcflag, flag.out="TRUE", quantile=NULL) | |
362 }, | |
363 "LightCycler"={ | |
364 unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE, flag=dcflag, flag.out="Y", quantile=NULL) | |
365 }, | |
366 "CFX"={ | |
367 unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE, flag=dcflag, flag.out="Y", quantile=NULL) | |
368 }, | |
369 "OpenArray"={ | |
370 unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE, flag=dcflag, flag.out="TRUE ", quantile=NULL) | |
371 }, | |
372 "BioMark"={ | |
373 unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE, flag=dcflag, flag.out="Fail", quantile=NULL) | |
374 }, | |
375 stop("Enter something that switches me!") | |
376 ) | |
377 #unreliable<-setCategory(raw, Ct.max=dcCtmax, Ct.min=dcCtmin,replicates=FALSE, flag=dcflag, flag.out="Y", quantile=NULL) | |
378 | |
379 #################################################################################################################### | |
380 #Filter out the values of the new category | |
381 xFilter <- filterCategory(unreliable) | |
382 | |
383 cat("\n Categorization completed! ") | |
384 | |
385 png(x, # create PNG for the heat map | |
386 width = 10*300, # 5 x 300 pixels | |
387 height = 10*300, | |
388 res = 300, # 300 pixels per inch | |
389 pointsize = 8) | |
390 plotCtBoxes(xFilter, stratify=NULL, xlab = "Samples", ylab="Ct", names=as.character(seq(1, ncol(xFilter), 1))) # smaller font size | |
391 dev.off() | |
392 | |
393 #write.table(exprs(xFilter), file=x, quote=FALSE, row.names=TRUE, col.names=TRUE,sep = "\t") | |
394 | |
395 #################################################################################################################### | |
396 #NORMALIZATION | |
397 #method version 3.5.1 + Global mean | |
398 normalizeCtDataDav <- | |
399 function(q, | |
400 norm = "deltaCt", | |
401 deltaCt.genes = NULL, | |
402 scale.rank.samples, | |
403 rank.type = "pseudo.median", | |
404 Ct.max = dcCtmax, | |
405 geo.mean.ref, | |
406 verbose = TRUE) | |
407 { | |
408 # Extract the data | |
409 data <- exprs(q) | |
410 data.norm <- data | |
411 # Get the normalisation method | |
412 method <- match.arg(norm, c("quantile", "scale.rankinvariant", "norm.rankinvariant", "deltaCt", "geometric.mean", "globalmean")) | |
413 # Some general stuff that will be used by both rank.invariant methods | |
414 if (method %in% c("scale.rankinvariant", "norm.rankinvariant")) { | |
415 # Index to use for too high Ct values | |
416 #Ct.index <- data>Ct.max | |
417 data.Ctmax <- data | |
418 Ct.index <- is.na(data) | |
419 #data.Ctmax[Ct.index] <- NA | |
420 data<-na.spline(data) | |
421 # Define what to rank against | |
422 if (rank.type=="pseudo.median") { | |
423 ref.data <- apply(data.Ctmax, 1, median, na.rm=TRUE) | |
424 } else if (rank.type=="pseudo.mean") { | |
425 ref.data <- apply(data.Ctmax, 1, mean, na.rm=TRUE) | |
426 } | |
427 # Mark + replace NA values with something temporary | |
428 na.index <- is.na(ref.data) | |
429 ref.data[na.index] <- 30 | |
430 # Run the rank.invariant function | |
431 data.rankinvar <- apply(data, 2, normalize.invariantset, ref=ref.data) | |
432 } | |
433 # The actual normalisation | |
434 switch(method, | |
435 quantile = { | |
436 # Use an internal limma function | |
437 data.norm <- normalizeQuantiles(data) | |
438 }, | |
439 scale.rankinvariant = { | |
440 # Get all the rank invariant genes | |
441 ri.genes <- sapply(data.rankinvar, "[[", "i.set") | |
442 # Remove those with too high Ct values | |
443 ri.genes[Ct.index] <- FALSE | |
444 # Remove those that were all NA for potentially other reasons | |
445 ri.genes[na.index,] <- FALSE | |
446 # Select those to use here | |
447 ri.count <- rowSums(ri.genes) | |
448 if (missing(scale.rank.samples)) | |
449 scale.rank.samples <- ncol(data)-1 | |
450 ri.index <- ri.count >= scale.rank.samples | |
451 if (sum(ri.index)==0) | |
452 stop(paste("No rank invariant genes were found across", scale.rank.samples, "samples")) | |
453 # Extract the corresponding Ct values; average | |
454 ri.mean <- colMeans(data[ri.index,,drop=FALSE]) | |
455 ri.scale <- ri.mean/ri.mean[1] | |
456 # Correct the data | |
457 data.norm <- t(t(data)*ri.scale) | |
458 # Print info | |
459 if (verbose) { | |
460 cat(c("Scaling Ct values\n\tUsing rank invariant genes:", paste(featureNames(q)[ri.index], collapse=" "), "\n")) | |
461 cat(c("\tScaling factors:", format(ri.scale, digits=3), "\n")) | |
462 } | |
463 }, | |
464 norm.rankinvariant = { | |
465 # Print info | |
466 if (verbose) | |
467 cat("Normalizing Ct values\n\tUsing rank invariant genes:\n") | |
468 # Correct the data based on the calculations above | |
469 for (i in 1:ncol(data)) { | |
470 # Check if there are any rank invariant genes | |
471 ri.sub <- data.rankinvar[[i]] | |
472 ri.genes <- ri.sub[["i.set"]] | |
473 # Remove those that don't pass the Ct.max criteria | |
474 ri.genes[Ct.index[,i]] <- FALSE | |
475 # Remove those that are NA for other reasons | |
476 ri.genes[na.index] <- FALSE | |
477 if (sum(ri.genes)==0) { | |
478 warning(paste("\tNo rank invariant genes were found for sample ", sampleNames(q)[i], "; sample not normalized\n", sep="")) | |
479 next | |
480 } | |
481 # If verbose, print some info | |
482 if (verbose) | |
483 cat(paste("\t", sampleNames(q)[i], ": ", sum(ri.genes), " rank invariant genes\n", sep="")) | |
484 # The actual correction | |
485 data.norm[,i] <- as.numeric(approx(ri.sub$n.curve$y, ri.sub$n.curve$x, xout=data[,i], rule=2)$y) | |
486 } | |
487 }, | |
488 deltaCt = { | |
489 # Which are the reference genes (endogenous controls) | |
490 if (is.null(deltaCt.genes)) | |
491 deltaCt.genes <- unique(featureNames(q)[featureType(q)=="Endogenous Control"]) | |
492 c.index <- featureNames(q) %in% deltaCt.genes | |
493 if (verbose) { | |
494 cat(c("Calculating deltaCt values\n\tUsing control gene(s):", paste(deltaCt.genes, collapse=" "), "\n")) | |
495 } | |
496 # Run though all cards; perform internal normalisation | |
497 for (c in 1:ncol(data)) { | |
498 # Calculate the control genes | |
499 refCt <- mean(data[c.index,c], na.rm=TRUE) | |
500 refsd <- sd(data[c.index,c], na.rm=TRUE) | |
501 # Difference for target and controls | |
502 data.norm[,c] <- data[,c]-refCt | |
503 # Print results | |
504 if (verbose) | |
505 cat(paste("\tCard ", c, ":\tMean=", format(refCt, dig=4), "\tStdev=", format(refsd, dig=3), "\n", sep="")) | |
506 } | |
507 }, | |
508 geometric.mean = { | |
509 # For each column, calculate the geometric mean of Ct values<Ct.max | |
510 #geo.mean <- apply(data, 2, function(x) { | |
511 # xx <- log2(subset(x, x<Ct.max)) | |
512 # 2^mean(xx)}) | |
513 geo.mean <- apply(data, 2, function(x) { | |
514 xx <- subset(x, x<=Ct.max) | |
515 geometric.mean(xx)}) | |
516 # Which sample to scale to | |
517 #if (missing(geo.mean.ref)) | |
518 # geo.mean.ref <- 1 | |
519 # Calculate the scaling factor | |
520 #geo.scale <- geo.mean/geo.mean[geo.mean.ref] | |
521 # Adjust the data accordingly | |
522 data.norm <- t(t(data) - geo.mean) | |
523 if (verbose) { | |
524 cat(c("Scaling Ct values\n\tUsing geometric mean within each sample\n")) | |
525 #cat(c("\tScaling factors:", format(geo.scale, digits=3), "\n")) | |
526 } | |
527 } # switch | |
528 ,globalmean = { | |
529 glo <- apply(data, 2, function(x) { | |
530 xx <- subset(x, x <= Ct.max) | |
531 mean(xx) | |
532 }) | |
533 data.norm <- t(t(data) - glo) | |
534 } | |
535 ) | |
536 # Replace with the normalised Ct exprs | |
537 exprs(q) <- data.norm | |
538 # Add to the history of the object | |
539 if (nrow(getCtHistory(q))==0) | |
540 setCtHistory(q) <- data.frame(history="Manually created qPCRset object.", stringsAsFactors=FALSE) | |
541 setCtHistory(q) <- rbind(getCtHistory(q), capture.output(match.call(normalizeCtData))) | |
542 # Return the normalised object | |
543 q | |
544 } | |
545 | |
546 | |
547 switch(normalizationMethod, | |
548 "deltaCt"={ | |
549 normalizedDataset <- normalizeCtDataDav(xFilter, norm="deltaCt", deltaCt.genes =explode(normalizers, sep = ",")) | |
550 }, | |
551 "quantile"={ | |
552 normalizedDataset <- normalizeCtDataDav(xFilter, norm=normalizationMethod) | |
553 }, | |
554 "scale.rankinvariant"={ | |
555 normalizedDataset <- normalizeCtDataDav(xFilter, norm=normalizationMethod) | |
556 }, | |
557 "norm.rankinvariant"={ | |
558 normalizedDataset <- normalizeCtDataDav(xFilter, norm=normalizationMethod) | |
559 }, | |
560 "geometric.mean"={ | |
561 normalizedDataset <- normalizeCtDataDav(xFilter, norm=normalizationMethod) | |
562 }, | |
563 "globalmean"={ | |
564 normalizedDataset <- normalizeCtDataDav(xFilter, norm=normalizationMethod) | |
565 }, | |
566 stop("Enter something that switches me!") | |
567 ) | |
568 | |
569 #if (normalizationMethod=="deltaCt") { | |
570 #normalize CT data | |
571 | |
572 #normalizedDataset <- normalizeCtDataDav(xFilter, norm="deltaCt", deltaCt.genes =explode(normalizers, sep = ",")) | |
573 #} else { | |
574 #normalizedDataset <- normalizeCtDataDav(xFilter, norm=normalizationMethod) | |
575 | |
576 #} | |
577 cat("\n Data normalized correctly! \n") | |
578 write.table(exprs(normalizedDataset), file=outputNorm, quote=FALSE, row.names=TRUE, col.names=TRUE,sep = "\t") | |
579 | |
580 | |
581 #normalizedDataset | |
582 #################################################################################################################### | |
583 #Check noise reduction by empirical cumulative distribution | |
584 | |
585 #X = rnorm(100) # X is a sample of 100 normally distributed random variables | |
586 # P = ecdf(X) # P is a function giving the empirical CDF of X | |
587 #Y = rnorm(1000) # X is a sample of 100 normally distributed random variables | |
588 # PY = ecdf(Y) | |
589 #plotâ„— | |
590 | |
591 #lines(PY) | |
592 png(outputECDF, # create PNG for the heat map | |
593 width = 10*300, # 5 x 300 pixels | |
594 height = 10*300, | |
595 res = 300, # 300 pixels per inch | |
596 pointsize = 8) # smaller font size | |
597 vec=c() | |
598 for (i in 1:nrow(exprs(xFilter))){ | |
599 CVX<-(sd(2^-exprs(xFilter)[i,], na.rm = TRUE)/mean(2^-exprs(xFilter)[i,], na.rm = TRUE))*100 | |
600 vec=c(vec, c(CVX)) | |
601 } | |
602 vec<-na.omit(vec) | |
603 P = ecdf(vec) | |
604 gm=c() | |
605 for (i in 1:nrow(exprs(normalizedDataset))){ | |
606 CVGM<-(sd(2^-exprs(normalizedDataset)[i,], na.rm = TRUE)/mean(2^-exprs(normalizedDataset)[i,], na.rm = TRUE))*100 | |
607 gm=c(gm, c(CVGM)) | |
608 } | |
609 gm<-na.omit(gm) | |
610 | |
611 PY = ecdf(gm) | |
612 | |
613 plot_colors <- c(rgb(r=0.0,g=0.0,b=0.9), "red", "forestgreen",rgb(r=0.0,g=0.0,b=0.0),rgb(r=0.5,g=0.0,b=0.3),rgb(r=0.0,g=0.4,b=0.4)) | |
614 | |
615 plot(P,col=plot_colors[1],xlim=c(0.0,600), ylim=c(0.0,1),xaxp = c(0.0, 600, 6),yaxp = c(0.0, 1, 10), cex=1.3, lwd=5, main=NULL,xlab="CV(%)",ylab="Empirical Cumulative Distribution") | |
616 lines(PY, lwd=5, col=plot_colors[6],cex=1.3) | |
617 legend("bottomright", c("not normalized", "normalized"), cex=1.3, col=c(plot_colors[1],plot_colors[6]), lwd=c(5,5)); | |
618 dev.off() | |
619 | |
620 #Two-sample Kolmogorov-Smirnov | |
621 ks.test(vec,gm) | |
622 | |
623 | |
624 #write.table(exprs(qFiltNAs), file=outputIMP, quote=FALSE, row.names=TRUE, col.names=TRUE,sep = "\t") | |
625 png(outputIMP, # create PNG for the heat map | |
626 width = 10*300, # 5 x 300 pixels | |
627 height = 10*300, | |
628 res = 300, # 300 pixels per inch | |
629 pointsize = 8) | |
630 plotCtBoxes(normalizedDataset, stratify=NULL, xlab = "Samples", ylab="DeltaCt", names=as.character(seq(1, ncol(normalizedDataset), 1))) # smaller font size | |
631 dev.off() | |
632 | |
633 ################################################## Filtering based on number of NAs################################################## | |
634 | |
635 #FILTERING on the basis of NAs | |
636 #qPCRset.R | |
637 setMethod("exprs", signature(object="qPCRset"), definition = | |
638 function (object) {x <- assayDataElement(object, "exprs"); rownames(x) <- featureNames(object); colnames(x) <- sampleNames(phenoData(object));x} | |
639 ) | |
640 ncatn <- as.integer(n.samples(normalizedDataset))*as.integer(percentofnastoremove)/100 | |
641 | |
642 qFiltNAs <- filterCtData(normalizedDataset, remove.category=c("Undetermined","Unreliable"), n.category=as.integer(ncatn),remove.name=explode(filtnames, sep = ",")) | |
643 | |
644 cat("\n Data filtered correctly! \n") | |
645 | |
646 if (is.na(exprs(qFiltNAs))){ | |
647 switch(imputeMethod, | |
648 "knn"={ | |
649 imp<-impute.knn(exprs(qFiltNAs) ,k = as.integer(kappa), maxp = as.integer(macsp), rng.seed=362436069) | |
650 exprs(qFiltNAs)=imp$data | |
651 }, | |
652 "mestdagh"={ | |
653 #Mesdagh | |
654 #sostituisce a NA -1000 | |
655 for (i in 1:nrow(exprs(qFiltNAs))){ | |
656 for(j in 1:ncol(exprs(qFiltNAs))){ | |
657 if(is.na(exprs(qFiltNAs)[i,j])>0)exprs(qFiltNAs)[i,j]<- -1000 | |
658 } | |
659 } | |
660 temp<-exprs(qFiltNAs) | |
661 for (i in 1:nrow(exprs(qFiltNAs))){ | |
662 for(j in 1:ncol(exprs(qFiltNAs))){ | |
663 if(exprs(qFiltNAs)[i,j]<(-100))exprs(qFiltNAs)[i,j]<- max(temp[i,])+1 | |
664 } | |
665 } | |
666 }, | |
667 "cubic"={ | |
668 exprs(qFiltNAs) <- na.spline(exprs(qFiltNAs)) | |
669 }, | |
670 "mean"={ | |
671 exprs(qFiltNAs)<-impute(exprs(qFiltNAs),mean) | |
672 }, | |
673 "median"={ | |
674 exprs(qFiltNAs)<-impute(exprs(qFiltNAs),median) | |
675 }, | |
676 "nondetects"={ | |
677 qFiltNAs <- qpcrImpute(qFiltNAs, outform=c("Single"),linkglm = c("logit")) | |
678 }, | |
679 stop("Enter something that switches me!") | |
680 ) | |
681 | |
682 cat("\n Imputation completed! \n") | |
683 }else{ | |
684 cat("\n Nothing to impute! \n") | |
685 } | |
686 | |
687 write.table(exprs(qFiltNAs), file=outputRemaining, quote=FALSE, row.names=TRUE, col.names=TRUE, sep = "\t") | |
688 | |
689 if (DEAMethod=="ttest") { | |
690 #Differential expression analysis (paired t test+BH). Returns Fold change in linear scale. | |
691 DEG<-ttestCtData(qFiltNAs, groups = files$Treatment, alternative = alternative, paired = ifelse(paired=="TRUE", TRUE, FALSE), replicates =ifelse(replicates=="TRUE", TRUE, FALSE), sort=ifelse(sort=="TRUE", TRUE, FALSE), stringent=ifelse(stringent=="TRUE", TRUE, FALSE), p.adjust=padjust) | |
692 write.table(DEG, file=outputDEA, quote=FALSE, row.names=TRUE, col.names=TRUE,sep = "\t") | |
693 } | |
694 if (DEAMethod=="rp") { | |
695 DEG<-RP(exprs(qFiltNAs), as.numeric(pData(qFiltNAs)$Treatment)-1, num.perm = 1000,logged = TRUE, gene.names = featureNames(qFiltNAs), huge=TRUE, plot = FALSE, rand = 123) | |
696 write.table(DEG[1:5], file=outputDEA, quote=FALSE, row.names=TRUE, col.names=TRUE,sep = "\t") | |
697 } | |
698 cat("\n Differential expression analysis completed correctly! \n") | |
699 cat("\n Workflow ended correctly! \n") |