Rarefaction curves
##code of Mahendra Mariadassou, INRA
## Import additionnal packages
# library(parallel)
## Rarefaction curve, ggplot style (additionnal phyloseq-extend function, not yet released)
ggrare <- function(physeq, step = 10, label = NULL, color = NULL, plot = TRUE, parallel = FALSE, se = TRUE) {
## Args:
## - physeq: phyloseq class object, from which abundance data are extracted
## - step: Step size for sample size in rarefaction curves
## - label: Default `NULL`. Character string. The name of the variable
## to map to text labels on the plot. Similar to color option
## but for plotting text.
## - color: (Optional). Default ‘NULL’. Character string. The name of the
## variable to map to colors in the plot. This can be a sample
## variable (among the set returned by
## ‘sample_variables(physeq)’ ) or taxonomic rank (among the set
## returned by ‘rank_names(physeq)’).
##
## Finally, The color scheme is chosen automatically by
## ‘link{ggplot}’, but it can be modified afterward with an
## additional layer using ‘scale_color_manual’.
## - color: Default `NULL`. Character string. The name of the variable
## to map to text labels on the plot. Similar to color option
## but for plotting text.
## - plot: Logical, should the graphic be plotted.
## - parallel: should rarefaction be parallelized (using parallel framework)
## - se: Default TRUE. Logical. Should standard errors be computed.
## require vegan
x <- as(otu_table(physeq), "matrix")
if (taxa_are_rows(physeq)) { x <- t(x) }
## This script is adapted from vegan `rarecurve` function
tot <- rowSums(x)
S <- rowSums(x > 0)
nr <- nrow(x)
rarefun <- function(i) {
# cat(paste("rarefying sample", rownames(x)[i]), sep = "\n")
n <- seq(1, tot[i], by = step)
if (n[length(n)] != tot[i]) {
n <- c(n, tot[i])
}
y <- rarefy(x[i, ,drop = FALSE], n, se = se)
if (nrow(y) != 1) {
rownames(y) <- c(".S", ".se")
return(data.frame(t(y), Size = n, Sample = rownames(x)[i]))
} else {
return(data.frame(.S = y[1, ], Size = n, Sample = rownames(x)[i]))
}
}
if (parallel) {
out <- mclapply(seq_len(nr), rarefun, mc.preschedule = FALSE)
} else {
out <- lapply(seq_len(nr), rarefun)
}
df <- do.call(rbind, out)
## Get sample data
if (!is.null(sample_data(physeq, FALSE))) {
sdf <- as(sample_data(physeq), "data.frame")
sdf$Sample <- rownames(sdf)
data <- merge(df, sdf, by = "Sample")
labels <- data.frame(x = tot, y = S, Sample = rownames(x))
labels <- merge(labels, sdf, by = "Sample")
}
## Add, any custom-supplied plot-mapped variables
if( length(color) > 1 ){
data$color <- color
names(data)[names(data)=="color"] <- deparse(substitute(color))
color <- deparse(substitute(color))
}
if( length(label) > 1 ){
labels$label <- label
names(labels)[names(labels)=="label"] <- deparse(substitute(label))
label <- deparse(substitute(label))
}
p <- ggplot(data = data, aes_string(x = "Size", y = ".S", group = "Sample", color = color))
p <- p + labs(x = "Sample Size", y = "ASV Richness")
if (!is.null(label)) {
p <- p + geom_text(data = labels, aes_string(x = "x", y = "y", label = label, color = color),
size = 4, hjust = 0)
}
p <- p + geom_line()
if (se) { ## add standard error if available
p <- p + geom_ribbon(aes_string(ymin = ".S - .se", ymax = ".S + .se", color = NULL, fill = color), alpha = 0.2)
}
if (plot) {
plot(p)
}
invisible(p)
}
rare.level <- sample_sums(data)[[1]]
facet <- paste('facet_wrap(~',params$varExp,')')
p <- ggrare(data, step = 500, color = params$varExp, plot = FALSE) +
geom_vline(xintercept = rare.level, color = "gray60") + eval(parse(text = facet))
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 4
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 6
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 10
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 8
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 6
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 16
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 6
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 4
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 4
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 4
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 7
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 9
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 6
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 5
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 7
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 6
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 4
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 4
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 4
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 4
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 4
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 2
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning in rarefy(x[i, , drop = FALSE], n, se = se): most observed count data
have counts 1, but smallest count is 3
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.
plot(p)
