comparison analyze.r @ 0:1fcd81d65467 draft default tip

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/obisindicators commit b377ff767e3051f301c2f02cfe3e1a17b285ede4
author ecology
date Thu, 18 Jan 2024 09:33:52 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1fcd81d65467
1 #' Calculate Biodiversity Indicators, including ES50 (Hurlbert index)
2 #'
3 #' Calculate the expected number of marine species in a random sample of 50
4 #' individuals (records)
5 #'
6 #' @param df data frame with unique species observations containing columns:
7 #' `cell`, `species`, `records`
8 #' @param esn expected number of marine species
9 #'
10 #' @return Data frame with the following extra columns: - `n`: number of records
11 #' - `sp`: species richness - `shannon`: Shannon index - `simpson`: Simpson
12 #' index - `es`: Hurlbert index (n = 50), i.e. expected species from 50
13 #' samples ES(50) - `hill_1`: Hill number `exp(shannon)` - `hill_2`: Hill
14 #' number `1/simpson` - `hill_inf`: Hill number `1/maxp`
15 #'
16 #' @details The expected number of marine species in a random sample of 50
17 #' individuals (records) is an indicator on marine biodiversity richness. The
18 #' ES50 is defined in OBIS as the `sum(esi)` over all species of the following
19 #' per species calculation:
20 #'
21 #' - when `n - ni >= 50 (with n as the total number of records in the cell and
22 #' ni the total number of records for the ith-species)
23 #' - `esi = 1 - exp(lngamma(n-ni+1) + lngamma(n-50+1) - lngamma(n-ni-50+1) - lngamma(n+1))`
24 #'
25 #' - when `n >= 50` - `esi = 1`
26 #'
27 #' - else - `esi = NULL`
28 #'
29 #' Warning: ES50 assumes that individuals are randomly distributed, the sample
30 #' size is sufficiently large, the samples are taxonomically similar, and that
31 #' all of the samples have been taken in the same manner.
32 #'
33 #' @export
34 #' @concept analyze
35 #' @examples
36 #' @importFrom gsl lngamma
37 calc_indicators <- function(df, esn = 50) {
38
39 stopifnot(is.data.frame(df))
40 stopifnot(is.numeric(esn))
41 stopifnot(all(c("cell", "species", "records") %in% names(df)))
42
43 df %>%
44 dplyr::group_by(cell, species) %>%
45 dplyr::summarize(
46 ni = sum(records),
47 .groups = "drop_last") %>%
48 dplyr::mutate(n = sum(ni)) %>%
49 dplyr::group_by(cell, species) %>%
50 dplyr::mutate(
51 hi = -(ni / n * log(ni / n)),
52 si = (ni / n)^2,
53 qi = ni / n,
54 esi = dplyr::case_when(
55 n - ni >= esn ~ 1 - exp(gsl::lngamma(n - ni + 1) + gsl::lngamma(n - esn + 1) - gsl::lngamma(n - ni - esn + 1) - gsl::lngamma(n + 1)),
56 n >= esn ~ 1
57 )
58 ) %>%
59 dplyr::group_by(cell) %>%
60 dplyr::summarize(
61 n = sum(ni),
62 sp = dplyr::n(),
63 shannon = sum(hi),
64 simpson = sum(si),
65 maxp = max(qi),
66 es = sum(esi),
67 .groups = "drop") %>%
68 dplyr::mutate(
69 hill_1 = exp(shannon),
70 hill_2 = 1 / simpson,
71 hill_inf = 1 / maxp)
72 }