Mercurial > repos > ecology > pampa_plotglm

diff FunctPAMPAGalaxy.r @ 0:3ab852a7ff53 draft
"planemo upload for repository https://github.com/ColineRoyaux/PAMPA-Galaxy commit 04381ca7162ec3ec68419e308194b91d11cacb04"
author: ecology
date: Mon, 16 Nov 2020 11:02:43 +0000
children: 5ddca052c314
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/FunctPAMPAGalaxy.r	Mon Nov 16 11:02:43 2020 +0000
@@ -0,0 +1,1625 @@
+#Rscript
+
+
+##################################################################################################################################
+####################### PAMPA Galaxy tools functions : Calculate metrics, compute GLM and plot   #################################
+##################################################################################################################################
+
+#### Based on Yves Reecht R script
+#### Modified by Coline ROYAUX for integrating within Galaxy-E
+
+######################################### start of the function fact.def.f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r
+####### Define the finest aggregation with the observation table
+
+fact_det_f <- function(obs,
+                       size_class = "size.class",
+                       code_species = "species.code",
+                       unitobs = "observation.unit") {
+    if (any(is.element(c(size_class), colnames(obs))) && all(! is.na(obs[, size_class]))) {
+            factors <- c(unitobs, code_species, size_class)
+        }else{
+            factors <- c(unitobs, code_species)
+        }
+    return(factors)
+}
+
+######################################### end of the function fact.def.f
+
+######################################### start of the function def_typeobs_f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r
+####### Define observation type from colnames
+
+def_typeobs_f <- function(obs) {
+    if (any(is.element(c("rotation", "rot", "rotate"), colnames(obs)))) {
+        obs_type <- "SVR"
+    }else{
+        obs_type <- "other"
+    }
+    return(obs_type)
+}
+######################################### end of the function fact.def.f
+
+######################################### start of the function create_unitobs called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r
+####### Create unitobs column when inexistant
+create_unitobs <- function(data, year = "year", location = "location", unitobs = "observation.unit") {
+    if (is.element(paste(unitobs), colnames(data))) {
+            unitab <- data
+    }else{
+
+        unitab <- tidyr::unite(data, col = "observation.unit", c(year, location))
+    }
+    return(unitab)
+}
+######################################### start of the function create_unitobs
+
+######################################### start of the function create_year_location called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r
+####### separate unitobs column when existant
+create_year_location <- function(data, year = "year", location = "location", unitobs = "observation.unit") {
+    if (all(grepl("[1-2][0|8|9][0-9]{2}_.*", data[, unitobs])) == TRUE) {
+        tab <- tidyr::separate(data, col = unitobs, into = c(year, location), sep = "_")
+    }else{
+        if (all(grepl("[A-Z]{2}[0-9]{2}.*", data[, unitobs]) == TRUE)) {
+            tab <- tidyr::separate(data, col = unitobs, into = c("site1", year, "obs"), sep = c(2, 4))
+            tab <- tidyr::unite(tab, col = location, c("site1", "obs"))
+        }else{
+            tab <- data
+        }
+    }
+
+    tab <- cbind(tab, observation.unit = data[, unitobs])
+
+    return(tab)
+}
+######################################### start of the function create_year_location
+
+######################################### start of the function check_file called by every Galaxy Rscripts
+
+check_file <- function(dataset, err_msg, vars, nb_vars) {
+
+    ## Purpose: General function to check integrity of input file. Will
+    ##          check numbers and contents of variables(colnames).
+    ##          return an error message and exit if mismatch detected
+    ## ----------------------------------------------------------------------
+    ## Arguments: dataset : dataset name
+    ##            err_msg : output error
+    ##            vars : expected name of variables
+    ##            nb_vars : expected number of variables
+    ## ----------------------------------------------------------------------
+    ## Author: Alan Amosse, Benjamin Yguel
+
+    if (ncol(dataset) < nb_vars) { #checking for right number of columns in the file if not = error message
+        cat("\nerr nb var\n")
+        stop(err_msg, call. = FALSE)
+    }
+
+    for (i in vars) {
+        if (!(i %in% names(dataset))) { #checking colnames
+            stop(err_msg, call. = FALSE)
+        }
+    }
+}
+
+######################################### end of the function check_file
+
+
+######################################### start of the function stat_rotations_nb_f called by calc_numbers_f
+
+stat_rotations_nb_f <- function(factors, obs) {
+    ## Purpose: Computing abundance statistics by rotation (max, sd)
+    ##          on SVR data
+    ## ----------------------------------------------------------------------
+    ## Arguments: factors : Names of aggregation factors
+    ##            obs : observation data
+    ## ----------------------------------------------------------------------
+    ## Author: Yves Reecht, Date: 29 oct. 2012, 16:01 modified by Coline ROYAUX 04 june 2020
+
+    ## Identification of valid rotations :
+    if (is.element("observation.unit", factors)) {
+        ## valid rotations (empty must be there as well) :
+        rotations <- tapply(obs$rotation,
+                            as.list(obs[, c("observation.unit", "rotation"), drop = FALSE]),
+                            function(x)length(x) > 0)
+
+        ## Changing NA rotations in FALSE :
+        rotations[is.na(rotations)] <- FALSE
+    }
+
+    ## ###########################################################
+    ## Abundance per rotation at chosen aggregation factors :
+    nombres_rot <- tapply(obs$number,
+                       as.list(obs[, c(factors, "rotation"), drop = FALSE]),
+                       function(x, ...) {
+                       ifelse(all(is.na(x)), NA, sum(x, ...))
+                                        },
+                       na.rm = TRUE)
+
+    ## If valid rotation NA are considered 0 :
+    nombres_rot <- sweep(nombres_rot,
+                      match(names(dimnames(rotations)), names(dimnames(nombres_rot)), nomatch = NULL),
+                      rotations,        # Tableau des secteurs valides (booléens).
+                      function(x, y) {
+                          x[is.na(x) & y] <- 0 # Lorsque NA et secteur valide => 0.
+                          return(x)
+                                     })
+
+    ## ##################################################
+    ## Statistics :
+
+    ## Means :
+    nb_mean <- apply(nombres_rot, which(is.element(names(dimnames(nombres_rot)), factors)),
+                         function(x, ...) {
+                             ifelse(all(is.na(x)), NA, mean(x, ...))
+                                          }, na.rm = TRUE)
+
+    ## Maxima :
+    nb_max <- apply(nombres_rot, which(is.element(names(dimnames(nombres_rot)), factors)),
+                        function(x, ...) {
+                            ifelse(all(is.na(x)), NA, max(x, ...))
+                                         }, na.rm = TRUE)
+
+    ## SD :
+    nb_sd <- apply(nombres_rot, which(is.element(names(dimnames(nombres_rot)), factors)),
+                       function(x, ...) {
+                           ifelse(all(is.na(x)), NA, sd(x, ...))
+                                        }, na.rm = TRUE)
+
+    ## Valid rotations count :
+    nombres_rotations <- apply(rotations, 1, sum, na.rm = TRUE)
+
+    ## Results returned as list :
+    return(list(nb_mean = nb_mean, nb_max = nb_max, nb_sd = nb_sd,
+                nombres_rotations = nombres_rotations, nombresTot = nombres_rot))
+}
+
+######################################### end of the function stat_rotations_nb_f
+
+######################################### start of the function calc_nb_default_f called by calc_numbers_f
+
+calc_nb_default_f <- function(obs,
+                                 factors = c("observation.unit", "species.code", "size.class"),
+                                 nb_name = "number") {
+    ## Purpose : Compute abundances at finest aggregation
+    ## ---------------------------------------------------------------------
+    ## Arguments: obs : observation table
+    ##            factors : aggregation factors
+    ##            nb_name : name of abundance column.
+    ##
+    ## Output: array with ndimensions = nfactors.
+    ## ----------------------------------------------------------------------
+    ## Author: Yves Reecht, Date: 19 déc. 2011, 13:38 modified by Coline ROYAUX 04 june 2020
+
+    ## Sum individuals number :
+    nbr <- tapply(obs[, nb_name],
+                  as.list(obs[, factors]),
+                  sum, na.rm = TRUE)
+
+    ## Absences as "true zero" :
+    nbr[is.na(nbr)] <- 0
+
+    return(nbr)
+}
+
+######################################### end of the function calc_nb_default_f
+
+######################################### start of the function calc_numbers_f
+
+calc_numbers_f <- function(obs, obs_type = "", factors = c("observation.unit", "species.code", "size.class"), nb_name = "number") {
+    ## Purpose: Produce data.frame used as table from output of calc_nb_default_f().
+    ## ----------------------------------------------------------------------
+    ## Arguments: obs : observation table
+    ##            obs_type : Type of observation (SVR, LIT, ...)
+    ##            factors : aggregation factors
+    ##            nb_name : name of abundance column
+    ##
+    ## Output: data.frame with (N aggregation factors + 1) columns
+    ## ----------------------------------------------------------------------
+    ## Author: Yves Reecht, Date: 19 déc. 2011, 13:46 modified by Coline ROYAUX 04 june 2020
+
+    if (obs_type == "SVR") {
+         ## Compute SVR abundances statistics :
+         stat_rotations <- stat_rotations_nb_f(factors = factors,
+                                                  obs = obs)
+
+         ## Mean for rotating videos (3 rotations at most times) :
+         nbr <- stat_rotations[["nb_mean"]]
+
+    }else{
+
+         nbr <- calc_nb_default_f(obs, factors, nb_name)
+    }
+
+    res <- as.data.frame(as.table(nbr), responseName = nb_name)
+
+    if (is.element("size.class", colnames(res))) {
+        res$size.class[res$size.class == ""] <- NA
+    }
+
+    ## If integer abundances :
+    if (isTRUE(all.equal(res[, nb_name], as.integer(res[, nb_name])))) {
+        res[, nb_name] <- as.integer(res[, nb_name])
+    }
+
+    if (obs_type == "SVR") {
+        ## statistics on abundances :
+        res[, "number.max"] <- as.vector(stat_rotations[["nb_max"]])
+        res[, "number.sd"] <- as.vector(stat_rotations[["nb_sd"]])
+
+    }
+
+    return(res)
+}
+
+######################################### end of the function calc_numbers_f
+
+######################################### start of the function pres_abs_f called by calc_biodiv_f
+
+pres_abs_f <- function(nombres, logical = FALSE) {
+    ## Purpose: Compute presence absence from abundances
+    ## ----------------------------------------------------------------------
+    ## Arguments: nombres : vector of individuals count.
+    ##            logical : (boolean) results as boolean or 0/1 ?
+    ## ----------------------------------------------------------------------
+    ## Author: Yves Reecht, Date: 29 oct. 2010, 10:20 modified by Coline ROYAUX 04 june 2020
+
+    if (any(nombres < 0, na.rm = TRUE)) {
+        stop("Negative abundances!")
+    }
+
+    if (logical) {
+        return(nombres > 0)
+    }else{
+        nombres[nombres > 0] <- 1
+        return(nombres)
+    }
+}
+
+######################################### end of the function pres_abs_f
+
+######################################### start of the function bettercbind called by agregations_generic_f
+
+bettercbind <- function(..., df_list = NULL, deparse.level = 1) {
+    ## Purpose: Apply cbind to data frame with mathcing columns but without
+    ##          redundancies.
+    ## ----------------------------------------------------------------------
+    ## Arguments: same as cbind...
+    ##            df_list : data.frames list
+    ## ----------------------------------------------------------------------
+    ## Author: Yves Reecht, Date: 17 janv. 2012, 21:10 modified by Coline ROYAUX 04 june 2020
+
+    if (is.null(df_list)) {
+        df_list <- list(...)
+    }
+
+    return(do.call(cbind,
+                   c(list(df_list[[1]][, c(tail(colnames(df_list[[1]]), -1),
+                                           head(colnames(df_list[[1]]), 1))]),
+                     lapply(df_list[- 1],
+                            function(x, coldel) {
+                            return(x[, !is.element(colnames(x),
+                                                    coldel),
+                                     drop = FALSE])
+                        },
+                            coldel = colnames(df_list[[1]])),
+                     deparse.level = deparse.level)))
+}
+
+######################################### end of the function bettercbind
+
+######################################### start of the function agregation_f called by agregations_generic_f
+
+agregation_f <- function(metric, d_ata, factors, cas_metric,
+                         nb_name = "number") {
+    ## Purpose: metric aggregation
+    ## ----------------------------------------------------------------------
+    ## Arguments: metric: colnames of chosen metric
+    ##            d_ata: Unaggregated data table
+    ##            factors: aggregation factors vector
+    ##            cas_metric: named vector of observation types depending
+    ##                         on chosen metric
+    ##            nb_name : abundance column name
+    ## ----------------------------------------------------------------------
+    ## Author: Yves Reecht, Date: 20 déc. 2011, 14:29 modified by Coline ROYAUX 04 june 2020
+
+    switch(cas_metric[metric],
+           "sum" = {
+               res <- tapply(d_ata[, metric],
+                             as.list(d_ata[, factors, drop = FALSE]),
+                             function(x) {
+                             ifelse(all(is.na(x)),
+                                    NA,
+                                    sum(x, na.rm = TRUE))
+                         })
+           },
+           "w.mean" = {
+               res <- tapply(seq_len(nrow(d_ata)),
+                             as.list(d_ata[, factors, drop = FALSE]),
+                             function(ii) {
+                             ifelse(all(is.na(d_ata[ii, metric])),
+                                    NA,
+                                    weighted.mean(d_ata[ii, metric],
+                                                  d_ata[ii, nb_name],
+                                                  na.rm = TRUE))
+                         })
+           },
+           "w.mean.colonies" = {
+               res <- tapply(seq_len(nrow(d_ata)),
+                             as.list(d_ata[, factors, drop = FALSE]),
+                             function(ii) {
+                             ifelse(all(is.na(d_ata[ii, metric])),
+                                    NA,
+                                    weighted.mean(d_ata[ii, metric],
+                                                  d_ata[ii, "colonies"],
+                                                  na.rm = TRUE))
+                         })
+           },
+           "w.mean.prop" = {
+               res <- tapply(seq_len(nrow(d_ata)),
+                             as.list(d_ata[, factors, drop = FALSE]),
+                             function(ii) {
+                             ifelse(all(is.na(d_ata[ii, metric])) || sum(d_ata[ii, "nombre.tot"], na.rm = TRUE) == 0,
+                                    NA,
+                                    ifelse(all(na.omit(d_ata[ii, metric]) == 0),
+                                           0,
+                                           (sum(d_ata[ii, nb_name][!is.na(d_ata[ii, metric])], na.rm = TRUE) /
+                                            sum(d_ata[ii, "nombre.tot"], na.rm = TRUE)) *
+                                           ## Correction if size class isn't an aggregation factor
+                                           ## (otherwise value divided by number of present classes) :
+                                           ifelse(is.element("size.class", factors),
+                                                  100,
+                                                  100 * length(unique(d_ata$size.class)))))
+                         })
+
+           },
+           "w.mean.prop.bio" = {
+               res <- tapply(seq_len(nrow(d_ata)),
+                             as.list(d_ata[, factors, drop = FALSE]),
+                             function(ii) {
+                             ifelse(all(is.na(d_ata[ii, metric])) || sum(d_ata[ii, "tot.biomass"], na.rm = TRUE) == 0,
+                                    NA,
+                                    ifelse(all(na.omit(d_ata[ii, metric]) == 0),
+                                           0,
+                                           (sum(d_ata[ii, "biomass"][!is.na(d_ata[ii, metric])], na.rm = TRUE) /
+                                            sum(d_ata[ii, "tot.biomass"], na.rm = TRUE)) *
+                                           ## Correction if size class isn't an aggregation factor
+                                           ## (otherwise value divided by number of present classes) :
+                                           ifelse(is.element("size.class", factors),
+                                                  100,
+                                                  100 * length(unique(d_ata$size.class)))))
+                         })
+
+           },
+           "pres" = {
+               res <- tapply(d_ata[, metric],
+                             as.list(d_ata[, factors, drop = FALSE]),
+                             function(x) {
+                             ifelse(all(is.na(x)), # When only NAs.
+                                    NA,
+                                    ifelse(any(x > 0, na.rm = TRUE), # Otherwise...
+                                           1, # ... presence if at least one observation in the group.
+                                           0))
+                         })
+           },
+           "nbMax" = {
+
+              ## Sum by factor cross / rotation :
+               nb_tmp2 <- apply(nb_tmp,
+                             which(is.element(names(dimnames(nb_tmp)), c(factors, "rotation"))),
+                             function(x) {
+                             ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE))
+                         })
+
+               ## Sum by factor cross :
+               res <- as.array(apply(nb_tmp2,
+                                     which(is.element(names(dimnames(nb_tmp)), factors)),
+                                     function(x) {
+                                     ifelse(all(is.na(x)), NA, max(x, na.rm = TRUE))
+                                 }))
+           },
+           "nbSD" = {
+
+               ## Sum by factor cross / rotation :
+               nb_tmp2 <- apply(nb_tmp,
+                             which(is.element(names(dimnames(nb_tmp)), c(factors, "rotation"))),
+                             function(x) {
+                             ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE))
+                         })
+
+               ## Sum by factor cross :
+               res <- as.array(apply(nb_tmp2,
+                                     which(is.element(names(dimnames(nb_tmp)), factors)),
+                                     function(x) {
+                                     ifelse(all(is.na(x)), NA, sd(x, na.rm = TRUE))
+                                 }))
+           },
+           "densMax" = {
+
+               ## Sum by factor cross / rotation :
+               dens_tmp2 <- apply(dens_tmp,
+                                 which(is.element(names(dimnames(dens_tmp)), c(factors, "rotation"))),
+                                 function(x) {
+                                 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE))
+                             })
+
+               ## Sum by factor cross :
+               res <- as.array(apply(dens_tmp2,
+                                     which(is.element(names(dimnames(dens_tmp)), factors)),
+                                     function(x) {
+                                     ifelse(all(is.na(x)), NA, max(x, na.rm = TRUE))
+                                 }))
+           },
+           "densSD" = {
+
+               ## Sum by factor cross / rotation :
+               dens_tmp2 <- apply(dens_tmp,
+                                 which(is.element(names(dimnames(dens_tmp)), c(factors, "rotation"))),
+                                 function(x) {
+                                 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE))
+                             })
+
+               ## Sum by factor cross :
+               res <- as.array(apply(dens_tmp2,
+                                     which(is.element(names(dimnames(dens_tmp)), factors)),
+                                     function(x) {
+                                     ifelse(all(is.na(x)), NA, sd(x, na.rm = TRUE))
+                                 }))
+           },
+           "%.nesting" = {
+               res <- tapply(seq_len(nrow(d_ata)),
+                             as.list(d_ata[, factors, drop = FALSE]),
+                             function(ii) {
+                             ifelse(all(is.na(d_ata[ii, metric])),
+                                    NA,
+                                    weighted.mean(d_ata[ii, metric],
+                                                  d_ata[ii, "readable.tracks"],
+                                                  na.rm = TRUE))
+                         })
+           },
+           stop("Not implemented!")
+           )
+
+    ## dimension names
+    names(dimnames(res)) <- c(factors)
+
+    ## Transformation to long format :
+    reslong <- as.data.frame(as.table(res), responseName = metric)
+    reslong <- reslong[, c(tail(colnames(reslong), 1), head(colnames(reslong), -1))] # metric first
+
+    return(reslong)
+}
+
+######################################### end of the function agregation_f
+
+######################################### start of the function agregations_generic_f called y calc_biodiv_f in FucntExeCalcCommIndexesGalaxy.r
+
+agregations_generic_f <- function(d_ata, metrics, factors, list_fact = NULL, unit_sp_sz = NULL, unit_sp = NULL,
+                                  nb_name = "number") {
+    ## Purpose: Aggregate data
+    ## ----------------------------------------------------------------------
+    ## Arguments: d_ata : data set
+    ##            metrics : aggregated metric
+    ##            factors : aggregation factors
+    ##            list_fact : other factors to aggregate and add to output
+    ##            unit_sp_sz : Metrics table by unitobs/species/Size Class
+    ##            unit_sp : Metrics table by unitobs/species
+    ##            nb_name : abundance colname
+    ##
+    ## Output : aggregated data frame
+    ## ----------------------------------------------------------------------
+    ## Author: Yves Reecht, Date: 18 oct. 2010, 15:47 modified by Coline ROYAUX 04 june 2020
+
+    ## trt depending on metric type :
+    cas_metric <- c("number" = "sum",
+                     "mean.length" = "w.mean",
+                     "taille_moy" = "w.mean",
+                     "biomass" = "sum",
+                     "Biomass" = "sum",
+                     "weight" = "sum",
+                     "mean.weight" = "w.mean",
+                     "density" = "sum",
+                     "Density" = "sum",
+                     "CPUE" = "sum",
+                     "CPUE.biomass" = "sum",
+                     "presence_absence" = "pres",
+                     "abundance.prop.SC" = "w.mean.prop", # Not OK [!!!] ?
+                     "biomass.prop.SC" = "w.mean.prop.bio",  # Not OK [!!!] ?
+                     ## Benthos :
+                     "colonies" = "sum",
+                     "coverage" = "sum",
+                     "mean.size.colonies" = "w.mean.colonies",
+                     ## SVR (expérimental) :
+                     "number.max" = "nbMax",
+                     "number.sd" = "nbSD",
+                     "density.max" = "densMax",
+                     "density.sd" = "densSD",
+                     "biomass.max" = "sum",
+                     "spawning.success" = "%.nesting",
+                     "spawnings" = "sum",
+                     "readable.tracks" = "sum",
+                     "tracks.number" = "sum")
+
+    ## add "readable.tracks" for egg laying percentage :
+    if (any(cas_metric[metrics] == "%.nesting")) {
+        if (is.element("size.class", colnames(d_ata))) {
+            if (is.null(unit_sp_sz)) stop("unit_sp_sz doit être défini")
+
+            d_ata <- merge(d_ata,
+                          unit_sp_sz[, c("species.code", "observation.unit", "size.class", "readable.tracks")],
+                          by = c("species.code", "observation.unit", "size.class"),
+                          suffixes = c("", ".y"))
+        }else{
+            if (is.null(unit_sp)) stop("unit_sp must be defined")
+
+            d_ata <- merge(d_ata,
+                          unit_sp[, c("species.code", "observation.unit", "readable.tracks")],
+                          by = c("species.code", "observation.unit"),
+                          suffixes = c("", ".y"))
+        }
+    }
+
+    ## Add "number" field for computing ponderate means if absent :
+    if (any(cas_metric[metrics] == "w.mean" | cas_metric[metrics] == "w.mean.prop")) {
+        if (is.element("size.class", colnames(d_ata))) {
+            if (is.null(unit_sp_sz)) stop("unit_sp_sz must be defined")
+
+            d_ata <- merge(d_ata,
+                          unit_sp_sz[, c("species.code", "observation.unit", "size.class", nb_name)],
+                          by = c("species.code", "observation.unit", "size.class"),
+                          suffixes = c("", ".y"))
+
+            ## add tot abundance / species / observation unit :
+            nb_tot <- tapply(unit_sp_sz[, nb_name],
+                            as.list(unit_sp_sz[, c("species.code", "observation.unit")]),
+                            sum, na.rm = TRUE)
+
+            d_ata <- merge(d_ata,
+                          as.data.frame(as.table(nb_tot), responseName = "nombre.tot"))
+        }else{
+            if (is.null(unit_sp)) stop("unit_sp must be defined")
+
+            d_ata <- merge(d_ata,
+                          unit_sp[, c("species.code", "observation.unit", nb_name)], # [!!!] unit_sp_sz ?
+                          by = c("species.code", "observation.unit"),
+                          suffixes = c("", ".y"))
+        }
+    }
+
+    ## Add biomass field of biomass proportion by size class :
+    if (any(cas_metric[metrics] == "w.mean.prop.bio")) {
+        if (is.null(unit_sp_sz)) stop("unit_sp_sz doit être défini")
+
+        d_ata <- merge(d_ata,
+                      unit_sp_sz[, c("species.code", "observation.unit", "size.class", "biomass")],
+                      by = c("species.code", "observation.unit", "size.class"),
+                      suffixes = c("", ".y"))
+
+        ## add tot biomass / species / observation unit :
+        biom_tot <- tapply(unit_sp_sz$biomass,
+                          as.list(unit_sp_sz[, c("species.code", "observation.unit")]),
+                          function(x) {
+                          ifelse(all(is.na(x)),
+                                 NA,
+                                 sum(x, na.rm = TRUE))
+                      })
+
+        d_ata <- merge(d_ata,
+                      as.data.frame(as.table(biom_tot), responseName = "tot.biomass"))
+    }
+
+    ## add colony field for ponderate means pondérées if absent :
+    if (any(cas_metric[metrics] == "w.mean.colonies" & ! is.element("colonies", colnames(d_ata)))) {
+        d_ata$colonies <- unit_sp[match(apply(d_ata[, c("species.code", "observation.unit")],
+                                           1, paste, collapse = "*"),
+                                     apply(unit_sp[, c("species.code", "observation.unit")],
+                                           1, paste, collapse = "*")), "colonies"]
+    }
+
+
+    ## Aggregation of metric depending on factors :
+    reslong <- bettercbind(df_list = lapply(metrics,   # sapply used to have names
+                                         agregation_f,
+                                         d_ata = d_ata, factors = factors, cas_metric = cas_metric,
+                                         nb_name = nb_name))
+
+    ## Aggregation and add other factors :
+    if (! (is.null(list_fact) || length(list_fact) == 0)) {
+        reslong <- cbind(reslong,
+                         sapply(d_ata[, list_fact, drop = FALSE],
+                                function(fact) {
+                                tapply(fact,
+                                       as.list(d_ata[, factors, drop = FALSE]),
+                                       function(x) {
+                                       if (length(x) > 1 && length(unique(x)) > 1) { # must be one modality
+                                           return(NULL)                  # otherwise it is NULL
+                                       }else{
+                                           unique(as.character(x))
+                                       }
+                                   })
+                            }))
+    }
+
+    ## If some factors aren't at the right class :
+    if (any(tmp <- sapply(reslong[, list_fact, drop = FALSE], class) != sapply(d_ata[, list_fact, drop = FALSE], class))) {
+        for (i in which(tmp)) {
+            switch(sapply(d_ata[, list_fact, drop = FALSE], class)[i],
+                   "integer" = {
+                       reslong[, list_fact[i]] <- as.integer(as.character(reslong[, list_fact[i]]))
+                   },
+                   "numeric" = {
+                       reslong[, list_fact[i]] <- as.numeric(as.character(reslong[, list_fact[i]]))
+                   },
+                   reslong[, list_fact[i]] <- eval(call(paste("as", sapply(d_ata[, list_fact, drop = FALSE], class)[i], sep = "."),
+                                                        reslong[, list_fact[i]]))
+                   )
+        }
+    }
+
+    ## Initial order of factors levels :
+    reslong <- as.data.frame(sapply(colnames(reslong),
+                                    function(x) {
+                                    if (is.factor(reslong[, x])) {
+                                        return(factor(reslong[, x], levels = levels(d_ata[, x])))
+                                    }else{
+                                        return(reslong[, x])
+                                    }
+                                }, simplify = FALSE))
+
+
+    ## Check of other aggregated factors supplémentaires. There must be no NULL elements :
+    if (any(sapply(reslong[, list_fact], function(x) {
+                                             any(is.null(unlist(x)))
+                                                     }))) {
+        warning(paste("One of the suppl. factors is probably a subset",
+                      " of the observations grouping factor(s).", sep = ""))
+        return(NULL)
+    }else{
+        return(reslong)
+    }
+}
+
+######################################### end of the function agregations_generic_f
+
+######################################### start of the function drop_levels_f called y calc_biodiv_f in FucntExeCalcCommIndexesGalaxy.r and glm_community in FunctExeCalcGLMGalaxy.r
+drop_levels_f <- function(df, which = NULL) {
+    ## Purpose: Suppress unused levels of factors
+    ## ----------------------------------------------------------------------
+    ## Arguments: df : a data.frame
+    ##            which : included columns index (all by default)
+    ## ----------------------------------------------------------------------
+    ## Author: Yves Reecht, Date: 10 août 2010, 13:29 modified by Coline ROYAUX 04 june 2020
+
+    if (class(df) != "data.frame") {
+        stop("'df' must be a data.frame")
+    }else{
+        if (is.null(which)) {
+            x <- as.data.frame(sapply(df, function(x) {
+                                      return(x[, drop = TRUE])
+                                  }, simplify = FALSE),
+                               stringsAsFactors = FALSE)
+        }else{                          # Only some columns used
+            x <- df
+
+            x[, which] <- as.data.frame(sapply(df[, which, drop = FALSE],
+                                                function(x) {
+                                                return(x[, drop = TRUE])
+                                            }, simplify = FALSE),
+                                         stringsAsFactors = FALSE)
+        }
+
+        return(x)
+    }
+}
+######################################### end of the function drop_levels_f
+
+######################################### start of the function subset_all_tables_f called by glm_community in FunctExeCalcGLMGalaxy.r
+
+subset_all_tables_f <- function(metrique, tab_metrics, facteurs, selections,
+                                 tab_unitobs, refesp, tab_metrique = "", nb_name = "number", obs_type = "",
+                                 exclude = NULL, add = c("species.code", "observation.unit")) {
+    ## Purpose: Extract useful data only from chosen metrics and factors
+    ## ----------------------------------------------------------------------
+    ## Arguments: metrique : chosen metric
+    ##            facteurs : all chosen factors
+    ##            selections : corresponding modality selected
+    ##            tab_metrique : metrics table name
+    ##            exclude : factors levels to exclude
+    ##            add : field to add to data table
+    ## ----------------------------------------------------------------------
+    ## Author: Yves Reecht, Date:  6 août 2010, 16:46 modified by Coline ROYAUX 04 june 2020
+
+    ## If no metrics table available :
+    if (is.element(tab_metrique, c("", "TableOccurrences", "TablePresAbs"))) {
+        tab_metrique <- "unit_sp"
+    }
+
+    cas_tables <- c("unit_sp" = "unit_sp",
+                   "TablePresAbs" = "unit_sp",
+                   "unit_sp_sz" = "unit_sp_sz")
+
+    ## Recuperation of metrics table :
+    data_metric <- tab_metrics
+    unitobs <- tab_unitobs
+    refesp <- refesp
+
+    ## If no metrics available or already computed :
+    if (is.element(metrique, c("", "occurrence.frequency"))) {
+        metrique <- "tmp"
+        data_metric$tmp <- 0
+        data_metric$tmp[data_metric[, nb_name] > 0] <- 1
+    }
+
+    if (!is.null(add)) {
+        metriques <- c(metrique, add[is.element(add, colnames(data_metric))])
+    }else{
+        metriques <- metrique
+    }
+
+    ## Subset depending on metrics table
+    switch(cas_tables[tab_metrique],
+           ## Observation table by unitobs and species :
+           unit_sp = {
+                restmp <- cbind(data_metric[!is.na(data_metric[, metrique]), metriques, drop = FALSE],
+                                unitobs[match(data_metric$observation.unit[!is.na(data_metric[, metrique])],
+                                              unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs
+                                        facteurs[is.element(facteurs, colnames(unitobs))], drop = FALSE],
+                                refesp[match(data_metric$species.code[!is.na(data_metric[, metrique])],
+                                             refesp$species.code),        # ajout des colonnes sélectionnées d'especes
+                                       facteurs[is.element(facteurs, colnames(refesp))], drop = FALSE])
+            },
+           ## Observation table by unitobs, species and size class :
+           unit_sp_sz = {
+               restmp <- cbind(data_metric[!is.na(data_metric[, metrique]),
+                                            c(metriques, "size.class"), drop = FALSE],
+                               unitobs[match(data_metric$observation.unit[!is.na(data_metric[, metrique])],
+                                             unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs
+                                       facteurs[is.element(facteurs, colnames(unitobs))], drop = FALSE],
+                               refesp[match(data_metric$species.code[!is.na(data_metric[, metrique])],
+                                            refesp$species.code),        # ajout des colonnes sélectionnées d'especes
+                                      facteurs[is.element(facteurs, colnames(refesp))], drop = FALSE])
+           },
+           ## Other cases :
+           restmp <- cbind(data_metric[!is.na(data_metric[, metrique]), metriques, drop = FALSE],
+                           unitobs[match(data_metric$observation.unit[!is.na(data_metric[, metrique])],
+                                         unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs.
+                                   facteurs[is.element(facteurs, colnames(unitobs))], drop = FALSE])
+           )
+
+    sel_col <- which(!is.na(selections))
+    if (!is.null(exclude)) {
+        sel_col <- sel_col[sel_col != exclude]
+    }
+
+    ## Particular case of size classes :
+    if (is.element("size.class", colnames(restmp))) {
+        if (length(grep("^[[:digit:]]*[-_][[:digit:]]*$", unique(as.character(restmp$size.class)), perl = TRUE)) ==
+            length(unique(as.character(restmp$size.class)))) {
+            restmp[, "size.class"] <-
+                factor(as.character(restmp$size.class),
+                       levels = unique(as.character(restmp$size.class))[
+                               order(as.numeric(sub("^([[:digit:]]*)[-_][[:digit:]]*$",
+                                                    "\\1",
+                                                    unique(as.character(restmp$size.class)),
+                                                    perl = TRUE)),
+                                     na.last = FALSE)])
+        }else{
+            restmp[, "size.class"] <- factor(restmp$size.class)
+        }
+    }
+
+    ## Biomass and density conversion -> /100m² :
+    if (any(is.element(colnames(restmp), c("biomass", "density",
+                                           "biomass.max", "density.max",
+                                           "biomass.sd", "density.sd"))) && obs_type != "fishing") {
+        restmp[, is.element(colnames(restmp),
+                             c("biomass", "density",
+                               "biomass.max", "density.max",
+                               "biomass.sd", "density.sd"))] <- 100 *
+                                   restmp[, is.element(colnames(restmp),
+                                                       c("biomass", "density",
+                                                         "biomass.max", "density.max",
+                                                         "biomass.sd", "density.sd"))]
+    }
+
+    return(restmp)
+}
+
+######################################### end of the function subset_all_tables_f
+
+######################################### start of the function organise_fact called by modeleLineaireWP2.xxx.f in FunctExeCalcGLMxxGalaxy.r
+
+organise_fact <- function(list_rand, list_fact) {
+    ## Purpose: organise response factors
+    ## ----------------------------------------------------------------------
+    ## Arguments: list_rand : Analysis random factors list
+    ##            list_fact : Analysis factors list
+    ## ----------------------------------------------------------------------
+    ## Author: Coline ROYAUX 14 november 2020
+
+    if (list_rand[1] != "None") {
+        if (all(is.element(list_fact, list_rand)) || list_fact[1] == "None") {
+            resp_fact <- paste("(1|", paste(list_rand, collapse = ") + (1|"), ")")
+            list_f <- NULL
+            list_fact <- list_rand
+        }else{
+            list_f <- list_fact[!is.element(list_fact, list_rand)]
+            resp_fact <- paste(paste(list_f, collapse = " + "), " + (1|", paste(list_rand, collapse = ") + (1|"), ")")
+            list_fact <- c(list_f, list_rand)
+        }
+    }else{
+        list_f <- list_fact
+        resp_fact <- paste(list_fact, collapse = " + ")
+    }
+    return(list(resp_fact, list_f, list_fact))
+}
+
+######################################### end of the function organise_fact
+
+######################################### start of the function organise_fact called by modeleLineaireWP2.xxx.f in FunctExeCalcGLMxxGalaxy.r
+distrib_choice <- function(distrib = distrib, metrique = metrique, data = tmpd_ata) {
+    ## Purpose: choose the right distribution
+    ## ----------------------------------------------------------------------
+    ## Arguments: data : data used for analysis
+    ##            metrique : Chosen metric
+    ##            distrib : distribution law selected by user
+    ## ----------------------------------------------------------------------
+    ## Author: Coline ROYAUX 14 november 2020
+
+    if (distrib == "None") {
+        if (metrique == "presence_absence") {
+            chose_distrib <- "binomial"
+        }else{
+            switch(class(data[, metrique]),
+                  "integer" = {
+                                   chose_distrib <- "poisson"
+                              },
+                  "numeric" = {
+                                   chose_distrib <- "gaussian"
+                              },
+                  stop("Selected metric class doesn't fit, you should select an integer or a numeric variable"))
+        }
+    }else{
+        chose_distrib <- distrib
+    }
+    return(chose_distrib)
+}
+
+######################################### end of the function organise_fact
+
+######################################### start of the function create_res_table called by modeleLineaireWP2.xxx.f in FunctExeCalcGLMxxGalaxy.r
+create_res_table <- function(list_rand, list_fact, row, lev, distrib) {
+    ## Purpose: create results table
+    ## ----------------------------------------------------------------------
+    ## Arguments: list_rand : Analysis random factors list
+    ##            list_fact : Analysis factors list
+    ##            row : rows of results table = species or separation factor
+    ##            lev : Levels of analysis factors list
+    ##            distrib : distribution law
+    ## ----------------------------------------------------------------------
+    ## Author: Coline ROYAUX 04 october 2020
+
+    if (list_rand[1] != "None") { ## if random effects
+        tab_sum <- data.frame(analysis = row, Interest.var = NA, distribution = NA, AIC = NA, BIC = NA, logLik = NA, deviance = NA, df.resid = NA)
+        colrand <- unlist(lapply(list_rand,
+                           FUN = function(x) {
+                                     lapply(c("Std.Dev", "NbObservation", "NbLevels"),
+                                                  FUN = function(y) {
+                                                            paste(x, y, collapse = ":")
+                                                                    })
+                                             }))
+        tab_sum[, colrand] <- NA
+
+        if (! is.null(lev)) { ## if fixed effects + random effects
+            colcoef <- unlist(lapply(c("(Intercept)", lev),
+                               FUN = function(x) {
+                                         lapply(c("Estimate", "Std.Err", "Zvalue", "Pvalue", "IC_up", "IC_inf", "signif"),
+                                                      FUN = function(y) {
+                                                                paste(x, y, collapse = ":")
+                                                                        })
+                                                 }))
+
+        }else{ ## if no fixed effects
+            colcoef <- NULL
+        }
+
+    }else{ ## if no random effects
+        tab_sum <- data.frame(analysis = row, Interest.var = NA, distribution = NA, AIC = NA, Resid.deviance = NA, df.resid = NA, Null.deviance = NA, df.null = NA)
+
+        switch(distrib,
+               "gaussian" = {
+                                 colcoef <- unlist(lapply(c("(Intercept)", lev),
+                                                          FUN = function(x) {
+                                                                    lapply(c("Estimate", "Std.Err", "Tvalue", "Pvalue", "IC_up", "IC_inf", "signif"),
+                                                                    FUN = function(y) {
+                                                                              paste(x, y, collapse = ":")
+                                                                                      })
+                                                                            }))
+
+                           },
+               "quasipoisson" = {
+                                     colcoef <- unlist(lapply(c("(Intercept)", lev),
+                                                              FUN = function(x) {
+                                                                        lapply(c("Estimate", "Std.Err", "Tvalue", "Pvalue", "IC_up", "IC_inf", "signif"),
+                                                                    FUN = function(y) {
+                                                                              paste(x, y, collapse = ":")
+                                                                                      })
+                                                                                }))
+
+                               }
+             , {
+                    colcoef <- unlist(lapply(c("(Intercept)", lev),
+                                      FUN = function(x) {
+                                                lapply(c("Estimate", "Std.Err", "Zvalue", "Pvalue", "IC_up", "IC_inf", "signif"),
+                                                       FUN = function(y) {
+                                                                 paste(x, y, collapse = ":")
+                                                                         })
+                                                        }))
+                })
+
+    }
+
+    tab_sum[, colcoef] <- NA
+
+
+    return(tab_sum)
+}
+######################################### end of the function create_res_table
+
+######################################### start of the function sorties_lm_f called by glm_community in FunctExeCalcGLMGalaxy.r
+sorties_lm_f <- function(obj_lm, obj_lmy, tab_sum, #formule,
+                        metrique, fact_ana, cut, col_ana, list_fact, list_rand, lev = NULL, d_ata,
+                        log = FALSE, sufixe = NULL) {
+    ## Purpose: Form GLM and LM results
+    ## ----------------------------------------------------------------------
+    ## Arguments: obj_lm : lm object
+    ##            obj_lmy : lm object with year as continuous
+    ##            tab_sum : output summary table
+    ##            formule : LM formula
+    ##            metrique : Chosen metric
+    ##            fact_ana : separation factor
+    ##            cut : level of separation factor
+    ##            col_ana : colname for separation factor in output summary table
+    ##            list_fact : Analysis factors list
+    ##            list_rand : Analysis random factors list
+    ##            levels : Levels of analysis factors list
+    ##            d_ata : d_ata used for analysis
+    ##            log : put log on metric ? (boolean)
+    ##            sufixe : sufix for file name
+    ## ----------------------------------------------------------------------
+    ## Author: Yves Reecht, Date: 25 août 2010, 16:19 modified by Coline ROYAUX 04 june 2020
+
+    tab_sum[, "Interest.var"] <- as.character(metrique)
+    sum_lm <- summary(obj_lm)
+    tab_sum[, "distribution"] <- as.character(sum_lm$family[1])
+
+    if (length(grep("^glmmTMB", obj_lm$call)) > 0) { #if random effects
+        tab_sum[tab_sum[, col_ana] == cut, "AIC"] <- sum_lm$AICtab[1]
+        tab_sum[tab_sum[, col_ana] == cut, "BIC"] <- sum_lm$AICtab[2]
+        tab_sum[tab_sum[, col_ana] == cut, "logLik"] <- sum_lm$AICtab[3]
+        tab_sum[tab_sum[, col_ana] == cut, "deviance"] <- sum_lm$AICtab[4]
+        tab_sum[tab_sum[, col_ana] == cut, "df.resid"] <- sum_lm$AICtab[5]
+
+        if (! is.null(lev)) { ## if fixed effects + random effects
+            tab_coef <- as.data.frame(sum_lm$coefficients$cond)
+            tab_coef$signif <- lapply(tab_coef[, "Pr(>|z|)"], FUN = function(x) {
+                                                                                     if (!is.na(x) && x < 0.05) {
+                                                                                         "yes"
+                                                                                     }else{
+                                                                                         "no"
+                                                                                     }
+                                                                                })
+
+            tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Zvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "z value"]
+            tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Pvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Pr(>|z|)"]
+
+            tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Zvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
+                 if (length(grep(x, rownames(tab_coef))) > 0) {
+                     tab_coef[grepl(x, rownames(tab_coef)), "z value"]
+                 }else{
+                     NA
+                 }
+            }))
+            tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Pvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
+                 if (length(grep(x, rownames(tab_coef))) > 0) {
+                      tab_coef[grepl(x, rownames(tab_coef)), "Pr(>|z|)"]
+                 }else{
+                      NA
+                 }
+            }))
+
+            if (any(obj_lmy != "")) {
+                sum_lmy <- summary(obj_lmy)
+                tab_coefy <- as.data.frame(sum_lmy$coefficients$cond)
+                tab_coefy$signif <- lapply(tab_coefy[, "Pr(>|z|)"], FUN = function(x) {
+                                                                                           if (!is.na(x) && x < 0.05) {
+                                                                                               "yes"
+                                                                                           }else{
+                                                                                               "no"
+                                                                                           }
+                                                                                      })
+                tab_sum[tab_sum[, col_ana] == cut, "year Zvalue"] <- ifelse(length(tab_coefy["year", "z value"]) > 0, tab_coefy["year", "z value"], NA)
+                tab_sum[tab_sum[, col_ana] == cut, "year Pvalue"] <- ifelse(length(tab_coefy["year", "Pr(>|z|)"]) > 0, tab_coefy["year", "Pr(>|z|)"], NA)
+            }
+
+        }
+
+        switch(as.character(length(sum_lm$varcor$cond)),
+               "1" = {
+                          std_d <- c(sum_lm$varcor$cond[[1]])
+                     },
+               "2" = {
+                          std_d <- c(sum_lm$varcor$cond[[1]], sum_lm$varcor$cond[[2]])
+                     },
+               std_d <- NULL)
+
+        tab_sum[tab_sum[, col_ana] == cut, grepl(paste(list_rand, "Std.Dev", collapse = "|"), colnames(tab_sum))] <- std_d
+        tab_sum[tab_sum[, col_ana] == cut, grepl(paste(list_rand, "NbObservation", collapse = "|"), colnames(tab_sum))] <- sum_lm$nobs
+        tab_sum[tab_sum[, col_ana] == cut, grepl(paste(list_rand, "NbLevels", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(list_rand, FUN = function(x) {
+          nlevels(d_ata[, x])
+                  }))
+
+    }else{ ## if fixed effects only
+
+        tab_sum[tab_sum[, col_ana] == cut, "AIC"] <- sum_lm$aic
+        tab_sum[tab_sum[, col_ana] == cut, "Resid.deviance"] <- sum_lm$deviance
+        tab_sum[tab_sum[, col_ana] == cut, "df.resid"] <- sum_lm$df.residual
+        tab_sum[tab_sum[, col_ana] == cut, "Null.deviance"] <- sum_lm$null.deviance
+        tab_sum[tab_sum[, col_ana] == cut, "df.null"] <- sum_lm$df.null
+        tab_coef <- as.data.frame(sum_lm$coefficients)
+
+        if (any(obj_lmy != "")) {
+            sum_lmy <- summary(obj_lmy)
+            tab_coefy <- as.data.frame(sum_lmy$coefficients)
+        }
+
+        if (sum_lm$family[1] == "gaussian" || sum_lm$family[1] == "quasipoisson") {
+
+            tab_coef$signif <- lapply(tab_coef[, "Pr(>|t|)"], FUN = function(x) {
+                                                                                     if (!is.na(x) && x < 0.05) {
+                                                                                          "yes"
+                                                                                     }else{
+                                                                                          "no"
+                                                                                     }
+                                                                                 })
+            tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Tvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "t value"]
+            tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Pvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Pr(>|t|)"]
+
+            tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Tvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
+                 if (length(grep(x, rownames(tab_coef))) > 0) {
+                     tab_coef[grepl(x, rownames(tab_coef)), "t value"]
+                 }else{
+                     NA
+                 }
+            }))
+
+            tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Pvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
+                 if (length(grep(x, rownames(tab_coef))) > 0) {
+                     tab_coef[grepl(x, rownames(tab_coef)), "Pr(>|t|)"]
+                 }else{
+                      NA
+                 }
+            }))
+
+            if (any(obj_lmy != "")) {
+                tab_coefy$signif <- lapply(tab_coefy[, "Pr(>|t|)"], FUN = function(x) {
+                                                                                           if (!is.na(x) && x < 0.05) {
+                                                                                               "yes"
+                                                                                           }else{
+                                                                                               "no"
+                                                                                           }
+                                                                                      })
+                tab_sum[tab_sum[, col_ana] == cut, "year Tvalue"] <- ifelse(length(tab_coefy["year", "t value"]) > 0, tab_coefy["year", "t value"], NA)
+                tab_sum[tab_sum[, col_ana] == cut, "year Pvalue"] <- ifelse(length(tab_coefy["year", "Pr(>|z|)"]) > 0, tab_coefy["year", "Pr(>|t|)"], NA)
+            }
+
+        }else{
+            tab_coef$signif <- lapply(tab_coef[, "Pr(>|z|)"], FUN = function(x) {
+                                                                                     if (!is.na(x) && x < 0.05) {
+                                                                                         "yes"
+                                                                                     }else{
+                                                                                         "no"
+                                                                                     }
+                                                                                })
+
+            tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Zvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "z value"]
+            tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Pvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Pr(>|z|)"]
+
+            tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Zvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
+                 if (length(grep(x, rownames(tab_coef))) > 0) {
+                     tab_coef[grepl(x, rownames(tab_coef)), "z value"]
+                 }else{
+                     NA
+                 }
+            }))
+            tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Pvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
+                 if (length(grep(x, rownames(tab_coef))) > 0) {
+                     tab_coef[grepl(x, rownames(tab_coef)), "Pr(>|z|)"]
+                 }else{
+                     NA
+                 }
+            }))
+
+            if (any(obj_lmy != "")) {
+                tab_coefy$signif <- lapply(tab_coefy[, "Pr(>|z|)"], FUN = function(x) {
+                                                                                           if (!is.na(x) && x < 0.05) {
+                                                                                               "yes"
+                                                                                           }else{
+                                                                                               "no"
+                                                                                           }
+                                                                                      })
+
+                tab_sum[tab_sum[, col_ana] == cut, "year Zvalue"] <- ifelse(length(tab_coefy["year", "z value"]) > 0, tab_coefy["year", "z value"], NA)
+                tab_sum[tab_sum[, col_ana] == cut, "year Pvalue"] <- ifelse(length(tab_coefy["year", "Pr(>|z|)"]) > 0, tab_coefy["year", "Pr(>|z|)"], NA)
+            }
+        }
+    }
+
+    if (! is.null(lev)) { ## if fixed effects
+        tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Estimate", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Estimate"]
+        tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Std.Err", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Std. Error"]
+        tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*signif", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "signif"]
+
+        tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Estimate", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
+                 if (length(grep(x, rownames(tab_coef))) > 0) {
+                     tab_coef[grepl(x, rownames(tab_coef)), "Estimate"]
+                 }else{
+                     NA
+                 }
+            }))
+        tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Std.Err", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
+                 if (length(grep(x, rownames(tab_coef))) > 0) {
+                     tab_coef[grepl(x, rownames(tab_coef)), "Std. Error"]
+                 }else{
+                     NA
+                 }
+            }))
+        tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "signif", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
+                 if (length(grep(x, rownames(tab_coef))) > 0) {
+                     tab_coef[grepl(x, rownames(tab_coef)), "signif"]
+                 }else{
+                     NA
+                 }
+            }))
+
+        if (any(obj_lmy != "")) {
+            tab_sum[tab_sum[, col_ana] == cut, "year Estimate"] <- ifelse(length(tab_coefy["year", "Estimate"]) > 0, tab_coefy["year", "Estimate"], NA)
+            tab_sum[tab_sum[, col_ana] == cut, "year Std.Err"] <- ifelse(length(tab_coefy["year", "Std. Error"]) > 0, tab_coefy["year", "Std. Error"], NA)
+            tab_sum[tab_sum[, col_ana] == cut, "year signif"] <- ifelse(length(tab_coefy["year", "signif"]) > 0, tab_coefy["year", "signif"], NA)
+        }
+
+    }
+
+    ic <- tryCatch(as.data.frame(confint(obj_lm)), error = function(e) {
+                                                                       })
+
+    tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "IC_up", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
+    if (length(grep(x, rownames(ic))) > 0) {
+        ic[grepl(x, rownames(ic)), "97.5 %"]
+    }else{
+        NA
+    }
+}))
+    tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "IC_inf", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
+     if (length(grep(x, rownames(ic))) > 0) {
+         ic[grepl(x, rownames(ic)), "2.5 %"]
+     }else{
+         NA
+     }
+}))
+
+    return(tab_sum)
+
+}
+
+
+######################################### end of the function sorties_lm_f
+
+
+######################################### start of the function note_glm_f called by glm_species and glm_community
+
+note_glm_f <- function(data, obj_lm, metric, list_fact, details = FALSE) {
+    ## Purpose: Note your GLM analysis
+    ## ----------------------------------------------------------------------
+    ## Arguments: data : d_ataframe used for analysis
+    ##            obj_lm : GLM assessed
+    ##            metric : selected metric
+    ##            list_fact : Analysis factors list
+    ##            details : detailed output ?
+    ## ----------------------------------------------------------------------
+    ## Author: Coline ROYAUX, 26 june 2020
+
+    rate <- 0
+    detres <- list(complete_plan = NA, balanced_plan = NA, NA_proportion_OK = NA, no_residual_dispersion = NA, uniform_residuals = NA, outliers_proportion_OK = NA, no_zero_inflation = NA, observation_factor_ratio_OK = NA, enough_levels_random_effect = NA, rate = NA)
+
+    #### d_ata criterions ####
+
+    ## Plan
+
+    plan <- as.data.frame(table(data[, list_fact]))
+
+    if (nrow(plan[plan$Freq == 0, ]) < nrow(plan) * 0.1) { # +0.5 if less than 10% of possible factor's level combinations aren't represented in the sampling scheme
+        rate <- rate + 0.5
+        detres$complete_plan <- TRUE
+
+        if (summary(as.factor(plan$Freq))[1] > nrow(plan) * 0.9) {  # +0.5 if the frequency of the most represented frequency of possible factor's levels combinations is superior to 90% of the total number of possible factor's levels combinations
+            rate <- rate + 0.5
+            detres$balanced_plan <- TRUE
+        }
+
+    }else{
+        detres$complete_plan <- FALSE
+        detres$balanced_plan <- FALSE
+    }
+
+    if (nrow(data) - nrow(na.omit(data)) < nrow(data) * 0.1) { # +1 if less than 10% of the lines in the dataframe bares a NA
+        rate <- rate + 1
+        detres["NA_proportion_OK"] <- TRUE
+    }else{
+        detres["NA_proportion_OK"] <- FALSE
+    }
+
+    #### Model criterions ####
+
+    if (length(grep("quasi", obj_lm$family)) == 0) { #DHARMa doesn't work with quasi distributions
+
+        residuals <- DHARMa::simulateResiduals(obj_lm)
+
+        capture.output(test_res <- DHARMa::testResiduals(residuals))
+        test_zero <- DHARMa::testZeroInflation(residuals)
+
+        ## dispersion of residuals
+
+        if (test_res$dispersion$p.value > 0.05) { # +1.5 if dispersion tests not significative
+            rate <- rate + 1.5
+            detres$no_residual_dispersion <- TRUE
+        }else{
+            detres$no_residual_dispersion <- FALSE
+        }
+
+        ## uniformity of residuals
+
+        if (test_res$uniformity$p.value > 0.05) { # +1 if uniformity tests not significative
+            rate <- rate + 1
+            detres$uniform_residuals <- TRUE
+        }else{
+            detres$uniform_residuals <- FALSE
+        }
+
+        ## residuals outliers
+
+        if (test_res$outliers$p.value > 0.05) { # +0.5 if outliers tests not significative
+            rate <- rate + 0.5
+            detres["outliers_proportion_OK"] <- TRUE
+        }else{
+            detres["outliers_proportion_OK"] <- FALSE
+        }
+
+        ## Zero inflation test
+
+        if (test_zero$p.value > 0.05) { # +1 if zero inflation tests not significative
+            rate <- rate + 1
+            detres$no_zero_inflation <- TRUE
+        }else{
+            detres$no_zero_inflation <- FALSE
+        }
+
+        ## Factors/observations ratio
+
+        if (length(list_fact) / nrow(na.omit(data)) < 0.1) { # +1 if quantity of factors is less than 10% of the quantity of observations
+            rate <- rate + 1
+            detres["observation_factor_ratio_OK"] <- TRUE
+        }else{
+            detres["observation_factor_ratio_OK"] <- FALSE
+        }
+
+        ## less than 10 factors' level on random effect
+
+        if (length(grep("^glmmTMB", obj_lm$call)) > 0) {
+            nlev_rand <- c()
+            for (fact in names(summary(obj_lm)$varcor$cond)) {
+                 nlev_rand <- c(nlev_rand, length(unlist(unique(data[, fact]))))
+            }
+
+            if (all(nlev_rand > 10)) { # +1 if more than 10 levels in one random effect
+                rate <- rate + 1
+                detres$enough_levels_random_effect <- TRUE
+            }else{
+                detres$enough_levels_random_effect <- FALSE
+            }
+        }
+
+        detres$rate <- rate
+
+        if (details) {
+            return(detres)
+        }else{
+            return(rate)
+        }
+
+    }else{
+        return(NA)
+        cat("Models with quasi distributions can't be rated for now")
+    }
+}
+
+######################################### end of the function note_glm_f
+
+######################################### start of the function note_glms_f called by glm_species and glm_community
+
+note_glms_f <- function(tab_rate, expr_lm, obj_lm, file_out = FALSE) {
+    ## Purpose: Note your GLM analysis
+    ## ----------------------------------------------------------------------
+    ## Arguments: tab_rate : rates table from note_glm_f
+    ##            expr_lm : GLM expression assessed
+    ##            obj_lm : GLM object
+    ##            file_out : Output as file ? else global rate only
+    ## ----------------------------------------------------------------------
+    ## Author: Coline ROYAUX, 26 june 2020
+    namefile <- "RatingGLM.txt"
+
+    if (length(grep("quasi", obj_lm$family)) == 0) { #DHARMa doesn't work with quasi distributions
+
+    rate_m <- median(na.omit(tab_rate[, "rate"]))
+    sum <- summary(obj_lm)
+
+    if (length(grep("^glmmTMB", obj_lm$call)) > 0) {
+        if (median(na.omit(tab_rate[, "rate"])) >= 6) { # if 50% has a rate superior or equal to 6 +1
+            rate_m <- rate_m + 1
+        }
+
+        if (quantile(na.omit(tab_rate[, "rate"]), probs = 0.9) >= 6) { # if 90% has a rate superior or equal to 6 +1
+            rate_m <- rate_m + 1
+        }
+    }else{
+        if (median(na.omit(tab_rate[, "rate"])) >= 5) { # if 50% has a rate superior or equal to 5 +1
+            rate_m <- rate_m + 1
+        }
+
+        if (quantile(na.omit(tab_rate[, "rate"]), probs = 0.9) >= 5) { # if 90% has a rate superior or equal to 5 +1
+            rate_m <- rate_m + 1
+        }
+    }
+
+    if (file_out) {
+
+        cat("###########################################################################",
+            "\n########################### Analysis evaluation ###########################",
+            "\n###########################################################################", file = namefile, fill = 1, append = TRUE)
+
+        ## Informations on model :
+        cat("\n\n######################################### \nFitted model:", file = namefile, fill = 1, append = TRUE)
+        cat("\t", deparse(expr_lm), "\n\n", file = namefile, sep = "", append = TRUE)
+        cat("Family: ", sum$family[[1]],
+            file = namefile, append = TRUE)
+        cat("\n\nNumber of analysis: ", nrow(tab_rate), file = namefile, append = TRUE)
+
+        ## Global rate :
+        cat("\n\n######################################### \nGlobal rate for all analysis:",
+            "\n\n", rate_m, "out of 10", file = namefile, append = TRUE)
+
+        ## details on every GLM :
+
+        cat("\n\n######################################### \nDetails on every analysis:\n\n", file = namefile, append = TRUE)
+        cat("Analysis\tC1\tC2\tC3\tC4\tC5\tC6\tC7\tC8\tC9\tFinal rate", file = namefile, append = TRUE)
+        apply(tab_rate, 1, FUN = function(x) {
+
+                                  if (!is.na(x["complete_plan"]) && x["complete_plan"] == TRUE) {
+                                      cat("\n", x[1], "\tyes", file = namefile, append = TRUE)
+                                  }else{
+                                      cat("\n", x[1], "\tno", file = namefile, append = TRUE)
+                                  }
+
+                                  for (i in c("balanced_plan", "NA_proportion_OK", "no_residual_dispersion", "uniform_residuals", "outliers_proportion_OK", "no_zero_inflation", "observation_factor_ratio_OK", "enough_levels_random_effect")) {
+                                      if (!is.na(x[i]) && x[i] == TRUE) {
+                                          cat("\tyes", file = namefile, append = TRUE)
+                                      }else{
+                                          cat("\tno", file = namefile, append = TRUE)
+                                      }
+                                  }
+
+                                  cat("\t", x["rate"], "/ 8", file = namefile, append = TRUE)
+
+
+                              })
+        cat("\n\nC1: Complete plan?\nC2: Balanced plan?\nC3: Few NA?\nC4: Regular dispersion?\nC5: Uniform residuals?\nC6: Regular outliers proportion?\nC7: No zero-inflation?\nC8: Good observation/factor ratio?\nC9: Enough levels on random effect?", file = namefile, append = TRUE)
+
+        ## Red flags - advice :
+        cat("\n\n######################################### \nRed flags - advice:\n\n", file = namefile, append = TRUE)
+        if (all(na.omit(tab_rate["NA_proportion_OK"]) == FALSE)) {
+            cat("\n", "\t- More than 10% of lines of your dataset contains NAs", file = namefile, append = TRUE)
+        }
+
+        if (length(grep("FALSE", tab_rate["no_residual_dispersion"])) / length(na.omit(tab_rate["no_residual_dispersion"])) > 0.5) {
+            cat("\n", "\t- More than 50% of your analyses are over- or under- dispersed : Try with another distribution family", file = namefile, append = TRUE)
+        }
+
+        if (length(grep("FALSE", tab_rate["uniform_residuals"])) / length(na.omit(tab_rate["uniform_residuals"])) > 0.5) {
+            cat("\n", "\t- More than 50% of your analyses haven't an uniform distribution of residuals : Try with another distribution family", file = namefile, append = TRUE)
+        }
+
+        if (length(grep("FALSE", tab_rate["outliers_proportion_OK"])) / length(na.omit(tab_rate["outliers_proportion_OK"])) > 0.5) {
+            cat("\n", "\t- More than 50% of your analyses have too much outliers : Try with another distribution family or try to select or filter your data", file = namefile, append = TRUE)
+        }
+
+        if (length(grep("FALSE", tab_rate["no_zero_inflation"])) / length(na.omit(tab_rate["no_zero_inflation"])) > 0.5) {
+            cat("\n", "\t- More than 50% of your analyses have zero inflation : Try to select or filter your data", file = namefile, append = TRUE)
+        }
+
+        if (length(grep("FALSE", tab_rate["observation_factor_ratio_OK"])) / length(na.omit(tab_rate["observation_factor_ratio_OK"])) > 0.5) {
+            cat("\n", "\t- More than 50% of your analyses have not enough observations for the amount of factors : Try to use less factors in your analysis or try to use another separation factor", file = namefile, append = TRUE)
+        }
+
+        if (any(tab_rate["enough_levels_random_effect"] == FALSE, na.rm = TRUE) && length(grep("^glmmTMB", obj_lm$call)) > 0) {
+            cat("\n", "\t- Random effect hasn't enough levels to be robust : If it has less than ten levels remove the random effect", file = namefile, append = TRUE)
+        }
+    }else{
+
+        return(rate_m)
+
+    }
+    }else{
+        cat("Models with quasi distributions can't be rated for now", file = namefile, append = TRUE)
+    }
+}
+
+######################################### end of the function note_glm_f
+
+######################################### start of the function info_stats_f called by glm_species and glm_community
+
+info_stats_f <- function(filename, d_ata, agreg_level = c("species", "unitobs"), type = c("graph", "stat"),
+                        metrique, fact_graph, fact_graph_sel, list_fact, list_fact_sel) {
+    ## Purpose: informations and simple statistics
+    ## ----------------------------------------------------------------------
+    ## Arguments: filename : name of file
+    ##            d_ata : input data
+    ##            agreg_level : aggregation level
+    ##            type : type of function calling
+    ##            metrique : selected metric
+    ##            fact_graph : selection factor
+    ##            fact_graph_sel : list of factors levels selected for this factor
+    ##            list_fact : list of grouping factors
+    ##            list_fact_sel : list of factors levels selected for these factors
+    ## ----------------------------------------------------------------------
+    ## Author: Yves Reecht, Date: 10 sept. 2012, 15:26 modified by Coline ROYAUX 04 june 2020
+
+    ## Open file :
+    f_ile <- file(description = filename,
+                 open = "w", encoding = "latin1")
+
+    ## if error  :
+    on.exit(if (exists("filename") &&
+                tryCatch(isOpen(f_ile),
+                         error = function(e)return(FALSE))) close(f_ile))
+
+    ## Metrics and factors infos :
+    print_selection_info_f(metrique = metrique, #fact_graph = fact_graph, fact_graph_sel = fact_graph_sel,
+                         list_fact = list_fact, #list_fact_sel = list_fact_sel,
+                         f_ile = f_ile,
+                         agreg_level = agreg_level, type = type)
+
+    ## statistics :
+    if (class(d_ata) == "list") {
+        cat("\n###################################################",
+            "\nStatistics per level of splitting factor:\n",
+            sep = "", file = f_ile, append = TRUE)
+
+        invisible(sapply(seq_len(length(d_ata)),
+                         function(i) {
+                         print_stats_f(d_ata = d_ata[[i]], metrique = metrique, list_fact = list_fact, f_ile = f_ile,
+                                      headline = fact_graph_sel[i])
+                     }))
+    }else{
+        print_stats_f(d_ata = d_ata, metrique = metrique, list_fact = list_fact, f_ile = f_ile,
+                     headline = NULL)
+    }
+
+    ## Close file :
+    close(f_ile)
+
+}
+
+######################################### end of the function info_stats_f
+
+
+######################################### start of the function print_selection_info_f called by info_stats_f
+
+print_selection_info_f <- function(metrique, list_fact,
+                                 f_ile,
+                                 agreg_level = c("species", "unitobs"), type = c("graph", "stat")) {
+    ## Purpose: Write data informations
+    ## ----------------------------------------------------------------------
+    ## Arguments: metrique : chosen metric
+    ##            list_fact : factor's list
+    ##            f_ile : Results file name
+    ##            agreg_level : aggregation level
+    ##            type : function type
+    ## ----------------------------------------------------------------------
+    ## Author: Yves Reecht, Date: 11 sept. 2012, 10:41 modified by Coline ROYAUX 04 june 2020
+
+    cat("\n##################################################\n",
+        "Metrics and factors (and possible units/selections):\n",
+        sep = "", file = f_ile, append = TRUE)
+
+    ## metric info :
+    cat("\n Metrics:", metrique,
+        "\n", file = f_ile, append = TRUE)
+
+    ## Clustering factors :
+    if (is.element(agreg_level, c("spCL_unitobs", "spCL_espece", "spSpecies", "spEspece",
+                                 "spUnitobs", "spUnitobs(CL)"))) {
+                                                                     type <- "spatialGraph"
+                                                                 }
+
+    cat(switch(type,
+               "graph" = "\nGrouping factor(s): \n * ",
+               "stat" = "\nAnalyses factor(s): \n * ",
+               "spatialGraph" = "\nSpatial aggregation factor(s): \n * "),
+        paste(list_fact, collaspe = "\n * "), "\n", file = f_ile, append = TRUE)
+
+}
+
+######################################### end of the function print_selection_info_f
+
+
+######################################### start of the function print_stats_f called by info_stats_f
+
+print_stats_f <- function(d_ata, metrique, list_fact, f_ile, headline = NULL) {
+    ## Purpose: Write general statistics table
+    ## ----------------------------------------------------------------------
+    ## Arguments: d_ata : Analysis data
+    ##            metrique : metric's name
+    ##            list_fact : Factor's list
+    ##            f_ile : Simple statistics file name
+    ## ----------------------------------------------------------------------
+    ## Author: Yves Reecht, Date: 11 sept. 2012, 10:09 modified by Coline ROYAUX 04 june 2020
+
+    ## Header :
+    if (! is.null(headline)) {
+        cat("\n", rep("#", nchar(headline) + 3), "\n",
+            "## ", headline, "\n",
+            sep = "", file = f_ile, append = TRUE)
+    }
+
+    cat("\n########################\nBase statistics:\n\n", file = f_ile, append = TRUE)
+
+    capture.output(print(summary_fr(d_ata[, metrique])), file = f_ile, append = TRUE)
+
+    if (! is.null(list_fact)) {
+        cat("\n#########################################",
+            "\nStatistics per combination of factor levels:\n\n", file = f_ile, sep = "", append = TRUE)
+
+        ## Compute summary for each existing factor's cross :
+        res <- with(d_ata,
+                    tapply(eval(parse(text = metrique)),
+                           INDEX = do.call(paste,
+                                         c(lapply(list_fact,
+                                                  function(y)eval(parse(text = y))),
+                                           sep = ".")),
+                           FUN = summary_fr))
+
+        ## results in table
+        capture.output(print(do.call(rbind, res)),
+                       file = f_ile, append = TRUE)
+    }
+
+    ## empty line :
+    cat("\n", file = f_ile, append = TRUE)
+}
+
+######################################### end of the function print_stats_f
+
+
+######################################### start of the function summary_fr called by print_stats_f
+summary_fr <- function(object, digits = max(3, getOption("digits") - 3), ...) {
+    ## Purpose: Adding SD and N to summary
+    ## ----------------------------------------------------------------------
+    ## Arguments: object : Object to summarise
+    ## ----------------------------------------------------------------------
+    ## Author: Yves Reecht, Date: 13 sept. 2012, 15:47 modified by Coline ROYAUX 04 june 2020
+
+    if (! is.numeric(object)) stop("Programming error")
+
+    ## Compute summary :
+    res <- c(summary(object = object, digits, ...), "sd" = signif(sd(x = object), digits = digits), "N" = length(object))
+
+    return(res)
+}
+
+######################################### start of the function summary_fr
author	ecology
date	Mon, 16 Nov 2020 11:02:43 +0000
parents
children	5ddca052c314