Mercurial > repos > cristian > notos
comparison KDEanalysis.r @ 0:1535ffddeff4 draft
planemo upload commit a7ac27de550a07fd6a3e3ea3fb0de65f3a10a0e6-dirty
author | cristian |
---|---|
date | Thu, 07 Sep 2017 08:51:57 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1535ffddeff4 |
---|---|
1 # Carry out analysis of CpGo/E data for Galaxy module | |
2 # Ingo Bulla | |
3 # 27 Jan 16 | |
4 | |
5 # load packages | |
6 pckg <- c("methods", "optparse") | |
7 for (p in pckg) { | |
8 if (!(p %in% rownames(installed.packages()))) { | |
9 stop( paste("R package", p , "is not installed"), call. = FALSE) | |
10 } | |
11 } | |
12 require(methods, quietly = TRUE) | |
13 require(optparse, quietly = TRUE) | |
14 | |
15 # determine directory where functions are located | |
16 cmdArgs <- commandArgs(trailingOnly = FALSE) | |
17 str <- "--file=" | |
18 match <- grep(str, cmdArgs) | |
19 if (length(match) == 0) { | |
20 stop("notos.r not set up to be called from R console") | |
21 } | |
22 path <- normalizePath( sub(str, "", cmdArgs[match]) ) | |
23 FCTN.DIR <- file.path(dirname(path), "Functions") | |
24 | |
25 source( file.path( FCTN.DIR, "Kernel_function_form.R") ) | |
26 | |
27 | |
28 MAX.CPGOE <- 10 # maximum value for CpGo/e ratios | |
29 | |
30 | |
31 # process outliers and return quantities characterizing the distribution | |
32 # obs: CpGo/e ratios | |
33 proc.outliers <- function(obs, frac.outl) { | |
34 ret <- list() | |
35 | |
36 # remove all zeros from sample | |
37 no.obs.raw <- length(obs) | |
38 ret[["prop.zero"]] <- sum(obs == 0) / no.obs.raw | |
39 obs <- obs[obs != 0] | |
40 if (length(obs) < 3) { | |
41 ret[["valid"]] <- FALSE | |
42 return(ret) | |
43 } | |
44 ret[["obs.nz"]] <- obs | |
45 | |
46 # replace very large values by a maximum value | |
47 obs <- sapply(obs, function(x) min(x, MAX.CPGOE)) | |
48 | |
49 # defining variables | |
50 # ... mean, median and standard deviation | |
51 ret[["mu.obs"]] <- mu.obs <- mean(obs) | |
52 ret[["me.obs"]] <- me.obs <- median(obs) | |
53 sd.obs <- sd(obs) | |
54 iqr.obs <- IQR(obs) | |
55 | |
56 # ... uppper and lower limits, based on mean +- k * sd, med. +- k * iqr, k = 2, ..., 4 | |
57 ul.mu <- mu.obs + (2 : 5) * sd.obs | |
58 ll.mu <- mu.obs - (2 : 5) * sd.obs | |
59 ul.me <- quantile(obs, 0.75) + (2 : 5) * iqr.obs | |
60 ll.me <- quantile(obs, 0.25) - (2 : 5) * iqr.obs | |
61 names(ul.mu) <- names(ll.mu) <- 2 : 5 | |
62 names(ul.me) <- names(ll.me) <- 2 : 5 | |
63 ret[["ul.mu"]] <- ul.mu | |
64 ret[["ll.mu"]] <- ll.mu | |
65 ret[["ul.me"]] <- ul.me | |
66 ret[["ll.me"]] <- ll.me | |
67 | |
68 # summary statistics and data output | |
69 # ... calculate proportion of data excluded when using different ranges | |
70 ret[["prop2"]] <- prop2 <- length(obs[obs < ll.me["2"] | ul.me["2"] < obs]) / no.obs.raw | |
71 ret[["prop3"]] <- prop3 <- length(obs[obs < ll.me["3"] | ul.me["3"] < obs]) / no.obs.raw | |
72 ret[["prop4"]] <- prop4 <- length(obs[obs < ll.me["4"] | ul.me["4"] < obs]) / no.obs.raw | |
73 ret[["prop5"]] <- prop5 <- length(obs[obs < ll.me["5"] | ul.me["5"] < obs]) / no.obs.raw | |
74 # ... choose k in Q1 / Q3 +- k * IQR such that no more than 1% of the data are excluded | |
75 v <- c(prop2, prop3, prop4, prop5) < frac.outl | |
76 | |
77 if (any(v)) { | |
78 excl.crit <- min(which(v)) | |
79 ret[["obs.cl"]] <- obs[!(obs < ll.me[excl.crit] | ul.me[excl.crit] < obs)] | |
80 ret[["used"]] <- paste(2 : 5, "iqr", sep = "")[excl.crit] | |
81 } else { | |
82 ret[["obs.cl"]] <- obs[!(obs < ll.me[4] | ul.me[4] < obs)] | |
83 ret[["used"]] <- "limited to 5 * iqr" | |
84 } | |
85 ret[["valid"]] <- TRUE | |
86 return(ret) | |
87 } | |
88 | |
89 | |
90 # Read CpGo/e ratios from file | |
91 # warn: issue warning if necessary | |
92 read.CpGoe <- function(fname, warn) { | |
93 # read input file line by line, split by whitespaces, assign last substring to CpGo/e ratios | |
94 # ... remove comments and trailing whitespaces | |
95 print(fname) | |
96 v <- read.table(fname, fill = TRUE, col.names = c("seq", "val")) | |
97 obs <- v$val | |
98 | |
99 obs <- obs[!is.na(obs)] | |
100 return(obs) | |
101 } | |
102 | |
103 | |
104 # process command line arguments | |
105 # expected arguments: | |
106 # - names of the species (each as a separate argument) | |
107 # - names of CpGo/e files of the species (each as a separate argument) | |
108 # ... parse arguments | |
109 option_list <- list(make_option(c("-o", "--frac-outl"), type = "double", default = 0.01, | |
110 help = "maximum fraction of CpGo/e ratios excluded as outliers [default %default]"), | |
111 make_option(c("-d", "--min-dist"), type = "double", default = 0.2, | |
112 help = "minimum distance between modes, modes that are closer are joined [default %default]"), | |
113 make_option(c("-c", "--conf-level"), type = "double", default = 0.95, | |
114 help = "level of the confidence intervals of the mode positions [default %default]"), | |
115 make_option(c("-m", "--mode-mass"), type = "double", default = 0.05, | |
116 help = "minimum probability mass of a mode [default %default]"), | |
117 make_option(c("-b", "--band-width"), type = "double", default = 1.06, | |
118 help = "bandwidth constant for kernels [default %default]"), | |
119 make_option(c("-B", "--bootstrap"), action="store_true", default = FALSE, | |
120 help = "calculate confidence intervals of mode positions using bootstrap [default %default]"), | |
121 make_option(c("-r", "--bootstrap-reps"), type = "integer", default = 1500, | |
122 help = "number of bootstrap repetitions [default %default]"), | |
123 make_option(c("-H", "--outlier-hist-file"), type = "character", default = "outliers_hist.pdf", | |
124 help = "name of the output file for the outlier histograms [default %default]"), | |
125 make_option(c("-C", "--cutoff-file"), type = "character", default = "outliers_cutoff.csv", | |
126 help = "name of the output file for the outlier cutoff [default %default]"), | |
127 make_option(c("-k", "--kde-file"), type = "character", default = "KDE.pdf", | |
128 help = "name of the output file for the KDE [default %default]"), | |
129 make_option(c("-v", "--valley-file"), type = "character", default = "valleys.csv", | |
130 help = "name of the output file with the values for valleys of the KDE [default %default]"), | |
131 make_option(c("-p", "--peak-file"), type = "character", default = "modes_basic_stats.csv", | |
132 help = "name of the output file describing the peaks of the KDE [default %default]"), | |
133 make_option(c("-s", "--bootstrap-file"), type = "character", default = "modes_bootstrap.csv", | |
134 help = "name of the output file for the bootstrap results [default %default]"), | |
135 make_option(c("-u", "--summary-file"), type = "character", default = "summary.csv", | |
136 help = "name of the summary file for the KDE results [default %default]"), | |
137 make_option(c("-f", "--no-warning-few-seqs"), action = "store_true", default = FALSE, | |
138 help = paste("suppress warning in case the input file only contains few values ", | |
139 "[default %default]", sep = ""))) | |
140 | |
141 op <- OptionParser(usage = "notos.r [options] spc_name_1 ... spc_name_N CpGoe_file_name_1 ... CpGoe_file_name_N", | |
142 description = paste("\nDescription: Notos generates a histogram and a kernel density estimator from files containing CpGo/e ratios. ", | |
143 "Moreover, it determines the number of modes of the CpGo/e ratio for each input file. The input files ", | |
144 "can either be composed of \n", | |
145 "1) CpGo/e ratios separated by linebreaks or\n", | |
146 "2) sequence names and CpGo/e ratios with each sequence name put on a separate line together with its CpGo/e ratio ", | |
147 "and sequence and CpGo/e being separated by whitespaces on each line.", sep = ""), | |
148 option_list = option_list) | |
149 args <- parse_args(op, positional_arguments = c(2, Inf)) | |
150 num.args <- length(args$args) | |
151 use.bstrp <- args$options$`bootstrap` | |
152 supp.warn.few <- args$options$`no-warning-few-seqs` | |
153 | |
154 | |
155 # ... check number of arguments | |
156 # ... ... check number of mandatory arguments | |
157 if (num.args < 2) { | |
158 stop("One species name and one file containing CpGo/e ratios have to be provided") | |
159 } | |
160 | |
161 # ... ... check whether number of mandatory arguments is even | |
162 if (num.args %% 2 != 0) { | |
163 stop("Number of arguments has to be even") | |
164 } | |
165 | |
166 # ... ... check maximum fraction of CpGo/e ratios excluded as outliers | |
167 frac.outl <- args$options$`frac-outl` | |
168 if ((frac.outl <= 0) || (frac.outl >= 1)) { | |
169 stop("The maximum fraction of CpGo/e ratios excluded as outliers has to be greater than zero and less than one") | |
170 } | |
171 if (frac.outl >= 0.2) { | |
172 warning("The maximum fraction of CpGo/e ratios excluded as outliers has been set to a rather large value, resulting in the removal of many CpGo/e ratios") | |
173 } | |
174 | |
175 | |
176 # ... check numerical arguments | |
177 # ... ... check minimum distance between modes | |
178 min.dist <- args$options$`min-dist` | |
179 if (min.dist < 0) { | |
180 stop("The minimum distance between modes has to be equal to or larger than zero") | |
181 } | |
182 if (min.dist >= 0.4) { | |
183 warning("The minimum distance between modes has been set to a rather large value, resulting in a strong reduction of the number of modes") | |
184 } | |
185 | |
186 # ... ... check confidence level | |
187 conf.lev <- args$options$`conf-level` | |
188 if ((conf.lev <= 0) || (conf.lev >= 1)) { | |
189 stop("The level of the confidence intervals of the mode positions has to be larger than zero and smaller than one.") | |
190 } | |
191 if (conf.lev >= 0.995) { | |
192 warning("The level of the confidence intervals of the mode positions has been set to a rather high value, resulting in very broad confidence intervals") | |
193 } | |
194 | |
195 # ... ... check minimum probability mass of a mode | |
196 mode.mass <- args$options$`mode-mass` | |
197 if ((mode.mass < 0) || (mode.mass >= 1)) { | |
198 stop("The minimum probability mass of a mode has to be larger than or equal to zero and smaller than one.") | |
199 } | |
200 if (mode.mass >= 0.3) { | |
201 warning("The minimum probability mass of a mode has been set to a rather large value, resulting in the elemination of a high number of modes.") | |
202 } | |
203 | |
204 # ... ... check bandwidth constant | |
205 band.width <- args$options$`band-width` | |
206 if (band.width <= 0) { | |
207 stop("The bandwidth constant has to be positive") | |
208 } | |
209 if (band.width >= 5) { | |
210 warning("The bandwidth constant has to been set to a rather large value, resulting in a strong smoothing") | |
211 } | |
212 | |
213 # ... ... check number of boostrap repetitions | |
214 bstrp.reps <- args$options$`bootstrap-reps` | |
215 if (bstrp.reps != round(bstrp.reps)) { | |
216 stop("The number of boostrap repetitions has to be a positive integer") | |
217 } | |
218 if (bstrp.reps <= 0) { | |
219 stop("The number of boostrap repetitions has to be positive") | |
220 } | |
221 if (bstrp.reps >= 10000) { | |
222 warning("The number of boostrap repetitions has been set to a rather large value, resulting in a long running time") | |
223 } | |
224 | |
225 # ... check file name arguments | |
226 # ... ... check histogram output file name | |
227 outlier.hist.fname <- args$options$`outlier-hist-file` | |
228 if ( file.exists(outlier.hist.fname) && (file.info(outlier.hist.fname)$isdir) ) { | |
229 stop(paste("File name for the outlier histogram output refers to a directory:", outlier.hist.fname)) | |
230 } | |
231 v <- strsplit(outlier.hist.fname, split = ".", fixed = TRUE)[[1]] | |
232 if ((length(v) == 1) || (v[ length(v) ] != "pdf")) { | |
233 warning(paste("File name for the outlier histogram output does not have a .pdf extension:", outlier.hist.fname)) | |
234 } | |
235 g <- gregexpr(pattern ='/', outlier.hist.fname)[[1]] | |
236 if (as.vector(g)[1] != -1) { | |
237 v <- as.vector(g) | |
238 d <- substr(outlier.hist.fname, 1, v[length(v)]) | |
239 if (!file.exists(d)) { | |
240 stop(paste("Path to file for the outlier histogram output is not valid:", outlier.hist.fname)) | |
241 } | |
242 } | |
243 | |
244 # ... ... check outlier cutoff output file name | |
245 cutoff.fname <- args$options$`cutoff-file` | |
246 if ( file.exists(cutoff.fname) && (file.info(cutoff.fname)$isdir) ) { | |
247 stop(paste("File name for the outlier cutoff table output refers to a directory:", cutoff.fname)) | |
248 } | |
249 v <- strsplit(cutoff.fname, split = ".", fixed = TRUE)[[1]] | |
250 if (length(v) == 1) { | |
251 stop(paste("File name for the outlier cutoff table output does not have a file extension:", cutoff.fname)) | |
252 } | |
253 #if (v[ length(v) ] != "xlsx") { | |
254 # warning(paste("File name for the outlier cutoff table output does not have a .xlsx extension:", cutoff.fname)) | |
255 #} | |
256 g <- gregexpr(pattern ='/', cutoff.fname)[[1]] | |
257 if (as.vector(g)[1] != -1) { | |
258 v <- as.vector(g) | |
259 d <- substr(cutoff.fname, 1, v[length(v)]) | |
260 if (!file.exists(d)) { | |
261 stop(paste("Path to file for the outlier cutoff is not valid:", cutoff.fname)) | |
262 } | |
263 } | |
264 | |
265 # ... ... check KDE output file name | |
266 kde.fname <- args$options$`kde-file` | |
267 if ( file.exists(kde.fname) && (file.info(kde.fname)$isdir) ) { | |
268 stop(paste("File name for the KDE output refers to a directory:", kde.fname)) | |
269 } | |
270 v <- strsplit(kde.fname, split = ".", fixed = TRUE)[[1]] | |
271 if ((length(v) == 1) || (v[ length(v) ] != "pdf")) { | |
272 warning(paste("File name for the KDE output does not have a .pdf extension:", kde.fname)) | |
273 } | |
274 g <- gregexpr(pattern ='/', kde.fname)[[1]] | |
275 if (as.vector(g)[1] != -1) { | |
276 v <- as.vector(g) | |
277 d <- substr(kde.fname, 1, v[length(v)]) | |
278 if (!file.exists(d)) { | |
279 stop(paste("Path to file for the KDE output is not valid:", kde.fname)) | |
280 } | |
281 } | |
282 | |
283 | |
284 # ... ... check peak descriptives output file name | |
285 peak.fname <- args$options$`peak-file` | |
286 if ( file.exists(peak.fname) && (file.info(peak.fname)$isdir) ) { | |
287 stop(paste("File name for the peak descriptives refers to a directory:", peak.fname)) | |
288 } | |
289 v <- strsplit(peak.fname, split = ".", fixed = TRUE)[[1]] | |
290 if ((length(v) == 1) || (v[ length(v) ] != "csv")) { | |
291 warning(paste("File name for the peak descriptives does not have a .csv extension:", peak.fname)) | |
292 } | |
293 g <- gregexpr(pattern ='/', peak.fname)[[1]] | |
294 if (as.vector(g)[1] != -1) { | |
295 v <- as.vector(g) | |
296 d <- substr(peak.fname, 1, v[length(v)]) | |
297 if (!file.exists(d)) { | |
298 stop(paste("Path to file for the peak descriptives is not valid:", peak.fname)) | |
299 } | |
300 } | |
301 | |
302 # ... ... check bootstrap results output file name | |
303 bstrp.fname <- args$options$`bootstrap-file` | |
304 if ( file.exists(bstrp.fname) && (file.info(bstrp.fname)$isdir) ) { | |
305 stop(paste("File name for the bootstrap results refers to a directory:", bstrp.fname)) | |
306 } | |
307 v <- strsplit(bstrp.fname, split = ".", fixed = TRUE)[[1]] | |
308 if ((length(v) == 1) || (v[ length(v) ] != "csv")) { | |
309 warning(paste("File name for the bootstrap results does not have a .csv extension:", bstrp.fname)) | |
310 } | |
311 g <- gregexpr(pattern ='/', bstrp.fname)[[1]] | |
312 if (as.vector(g)[1] != -1) { | |
313 v <- as.vector(g) | |
314 d <- substr(bstrp.fname, 1, v[length(v)]) | |
315 if (!file.exists(d)) { | |
316 stop(paste("Path to file for the bootstrap results is not valid:", bstrp.fname)) | |
317 } | |
318 } | |
319 | |
320 # ... ... check summary results output file name | |
321 summ.fname <- args$options$`summary-file` | |
322 if ( file.exists(summ.fname) && (file.info(summ.fname)$isdir) ) { | |
323 stop(paste("File name for the bootstrap results refers to a directory:", summ.fname)) | |
324 } | |
325 v <- strsplit(summ.fname, split = ".", fixed = TRUE)[[1]] | |
326 if ((length(v) == 1) || (v[ length(v) ] != "csv")) { | |
327 warning(paste("File name for the bootstrap results does not have a .csv extension:", summ.fname)) | |
328 } | |
329 g <- gregexpr(pattern ='/', summ.fname)[[1]] | |
330 if (as.vector(g)[1] != -1) { | |
331 v <- as.vector(g) | |
332 d <- substr(summ.fname, 1, v[length(v)]) | |
333 if (!file.exists(d)) { | |
334 stop(paste("Path to file for the bootstrap results is not valid:", summ.fname)) | |
335 } | |
336 } | |
337 | |
338 | |
339 # ... ... check CpGo/e input file names | |
340 num.spec <- num.args / 2 | |
341 spec.names <- args$args[1:num.spec] | |
342 cpgoe.fnames <- args$args[(num.spec + 1):num.args] | |
343 for (i in 1:length(cpgoe.fnames)) { | |
344 if (!file.exists(cpgoe.fnames[i])) { | |
345 stop(paste("CpGo/e file does not exist:", cpgoe.fnames[i])) | |
346 } | |
347 if (file.info(cpgoe.fnames[i])$isdir) { | |
348 stop(paste("CpGo/e file name refers to a directory:", cpgoe.fnames[i])) | |
349 } | |
350 } | |
351 | |
352 valleys.fname <- args$options$`valley-file` | |
353 | |
354 # remove outliers and output histograms | |
355 # ... set up table with cutoff quantities | |
356 tab.des <- data.frame(matrix(NA, nrow = num.spec, ncol = 6)) | |
357 names(tab.des) <- c("prop.zero", "prop.out.2iqr", "prop.out.3iqr", | |
358 "prop.out.4iqr", "prop.out.5iqr", "used") | |
359 rownames(tab.des) <- spec.names | |
360 | |
361 # ... set up figure | |
362 t.height <- 6 | |
363 t.width <- 20 | |
364 pdf(outlier.hist.fname, height = t.height,width = t.width, paper = "special") | |
365 par(mfrow = c(1, 3), mgp = c(2, 0.5, 0), mar = c(4.0, 3.0, 1.5, 1)) | |
366 tmp.fnames <- c() | |
367 | |
368 # ... iterate through species | |
369 for (i in 1:num.spec) { | |
370 fname <- cpgoe.fnames[i] | |
371 obs <- read.CpGoe(fname, TRUE) | |
372 | |
373 | |
374 # check CpGo/e ratios | |
375 for (j in 1:length(obs)) { | |
376 # is format legal? | |
377 val <- as.numeric( obs[j] ) | |
378 err.str <- paste("Observation", i, "in", fname) | |
379 if (!is.finite(val)) { | |
380 stop(paste(err.str, "could not be converted to a number:", obs[j])) | |
381 } | |
382 | |
383 # is ratio too small / large? | |
384 if (val < 0) { | |
385 stop(paste(err.str, "is negative:", val)) | |
386 } else { | |
387 if (val > MAX.CPGOE) { | |
388 warning(paste(err.str , "is suspiciously large:", val, "\nthis value is replaced by", MAX.CPGOE)) | |
389 } | |
390 } | |
391 } | |
392 | |
393 # process outliers and store the results | |
394 obs.org <- obs | |
395 l <- proc.outliers(obs, frac.outl) | |
396 if (!l[["valid"]]) { | |
397 stop( paste("Too few values in", fname, "(less than 3) after removal of zeros"), call. = FALSE ) | |
398 } | |
399 tab.des[i, "prop.zero"] <- l[["prop.zero"]] | |
400 mu.obs <- l[["mu.obs"]] | |
401 me.obs <- l[["me.obs"]] | |
402 ul.mu <- l[["ul.mu"]] | |
403 ll.mu <- l[["ll.mu"]] | |
404 ul.me <- l[["ul.me"]] | |
405 ll.me <- l[["ll.me"]] | |
406 tab.des[i, "prop.out.2iqr"] <- l[["prop2"]] | |
407 tab.des[i, "prop.out.3iqr"] <- l[["prop3"]] | |
408 tab.des[i, "prop.out.4iqr"] <- l[["prop4"]] | |
409 tab.des[i, "prop.out.5iqr"] <- l[["prop5"]] | |
410 obs.cl <- l[["obs.cl"]] | |
411 obs.nz <- l[["obs.nz"]] | |
412 tab.des[i, "used"] <- l[["used"]] | |
413 tab.des[i, "no.obs.raw"] <- length(obs.org) | |
414 tab.des[i, "no.obs.nozero"] <- length(obs.nz) | |
415 tab.des[i, "no.obs.clean"] <- length(obs.cl) | |
416 usedindex <- substr(l[["used"]],1,1) | |
417 # Histograms | |
418 # ... histogram 1: original data with zeros | |
419 t.breaks <- seq(0, max(obs.org) + 1, by = 0.03) | |
420 t.xlim <- c(0, ul.me["5"] + 0.1) | |
421 hist(obs.org, breaks = t.breaks, xlim = t.xlim, xlab = "CpG o/e", main = "", | |
422 sub = "Original data", prob = TRUE, | |
423 col = grey(0.9), border = grey(0.6)) | |
424 mtext(paste(spec.names[i]), side = 3, adj = 0) | |
425 | |
426 | |
427 # ... histogram 3: median / iqr based | |
428 t.lty <- rep(3, 4) | |
429 t.lty[usedindex] <- 1 | |
430 | |
431 hist(obs.nz, breaks = t.breaks, xlim = t.xlim, xlab = "CpG o/e", main = "", | |
432 sub = "Data without zeros, Q1/3 +- k*IQR, k=2,...,5", prob = TRUE, | |
433 col = grey(0.9), border = grey(0.6)) | |
434 abline(v = me.obs, col = 'blue', lwd = 2) | |
435 abline(v = c(ll.me, ul.me), col = "red", lty = rep(t.lty, 2)) | |
436 | |
437 # ... histogram 4: cleaned data | |
438 hist(obs.cl, breaks = t.breaks, xlim = t.xlim, xlab = "CpG o/e", main = "", | |
439 sub = "Cleaned data", prob = TRUE, | |
440 col = grey(0.9), border = grey(0.6)) | |
441 abline(v = me.obs, col = 'blue', lwd = 2) | |
442 abline(v = c(ll.me[usedindex], ul.me[usedindex]), col = "red") | |
443 } | |
444 invisible(dev.off()) | |
445 | |
446 # output cutoff quantities | |
447 write.table(tab.des, file = cutoff.fname, sep = "\t", col.names=NA) | |
448 | |
449 # plot KDE and output quantities characterizing the peaks and the bootstrap results | |
450 # ... table with quantities characterizing the peaks | |
451 v <- col.names.peaks() | |
452 tab1.m <- data.frame(matrix(NA, nrow = num.spec, ncol = length(v))) | |
453 names(tab1.m) <- col.names.peaks() | |
454 rownames(tab1.m) <- spec.names | |
455 | |
456 # ... table for the bootstrap | |
457 tab2.m <- data.frame(matrix(NA, nrow = num.spec, ncol = 7)) | |
458 names(tab2.m) <- col.names.bs() | |
459 rownames(tab2.m) <- spec.names | |
460 | |
461 # summary table | |
462 sum1.m <- data.frame(matrix(NA, nrow = num.spec, ncol = 13)) | |
463 names(sum1.m) <- c("Modes", "Skewness", "Variance", "Modes too close", "Peak1", "Peak2", "Peak3", "Peak4", "Peak5", "Peak6", "Peak7", "Peak8", "Peak9") | |
464 rownames(sum1.m) <- spec.names | |
465 | |
466 # ... plotting | |
467 t.height <- 6 | |
468 t.width <- 20 | |
469 pdf(kde.fname, height = t.height,width = t.width, paper = "special") | |
470 for (i in 1:num.spec) { | |
471 # read in GcGo/e ratios | |
472 obs <- read.CpGoe(cpgoe.fnames[i], FALSE) | |
473 l <- proc.outliers(obs, frac.outl) | |
474 obs.cl <- l[["obs.cl"]] | |
475 | |
476 # check number of values | |
477 fname <- cpgoe.fnames[i] | |
478 if (length(obs.cl) < 3) { | |
479 stop( paste("Too few values in", fname, "(less than 3) after removal of outliers and zeros"), call. = FALSE ) | |
480 } | |
481 if (!supp.warn.few & length(obs.cl) < 250) { | |
482 warning( paste(fname, " contains only few values (", length(obs.cl), ") after removal of outliers and zeros, which may lead to unreliable results", sep = ""), call. = FALSE ) | |
483 } | |
484 | |
485 # plotting | |
486 l <- plot.KDE(obs.cl, t.name = spec.names[i], bs.cis = use.bstrp, bstrp.reps = bstrp.reps, conf.lev = conf.lev, | |
487 min.dist = min.dist, mode.mass = mode.mass, band.width = band.width) | |
488 tab1.m[i, ] <- l$tab.des | |
489 sum1.m[i, ] <- l$tab.des[c(1, 4, 33, 30, 10+(2*0:8))] | |
490 if (use.bstrp) { | |
491 tab2.m[i, ] <- l$tab.bs | |
492 } | |
493 valleys = l$valleys | |
494 } | |
495 invisible(dev.off()) | |
496 #sessionInfo() | |
497 | |
498 # ... output quantities in tables | |
499 write.table(sum1.m, file = summ.fname, sep = "\t", col.names = NA) | |
500 write.table(tab1.m, file = peak.fname, sep = "\t", col.names=NA) | |
501 write.table(valleys, file = valleys.fname, sep = "\t", col.names=NA) | |
502 if (use.bstrp) { | |
503 write.table(tab2.m, file = bstrp.fname, sep = "\t", col.names=NA) | |
504 } |