Mercurial > repos > ecology > pampa_plotglm
comparison FunctPAMPAGalaxy.r @ 0:3ab852a7ff53 draft
"planemo upload for repository https://github.com/ColineRoyaux/PAMPA-Galaxy commit 04381ca7162ec3ec68419e308194b91d11cacb04"
| author | ecology |
|---|---|
| date | Mon, 16 Nov 2020 11:02:43 +0000 |
| parents | |
| children | 5ddca052c314 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:3ab852a7ff53 |
|---|---|
| 1 #Rscript | |
| 2 | |
| 3 | |
| 4 ################################################################################################################################## | |
| 5 ####################### PAMPA Galaxy tools functions : Calculate metrics, compute GLM and plot ################################# | |
| 6 ################################################################################################################################## | |
| 7 | |
| 8 #### Based on Yves Reecht R script | |
| 9 #### Modified by Coline ROYAUX for integrating within Galaxy-E | |
| 10 | |
| 11 ######################################### start of the function fact.def.f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r | |
| 12 ####### Define the finest aggregation with the observation table | |
| 13 | |
| 14 fact_det_f <- function(obs, | |
| 15 size_class = "size.class", | |
| 16 code_species = "species.code", | |
| 17 unitobs = "observation.unit") { | |
| 18 if (any(is.element(c(size_class), colnames(obs))) && all(! is.na(obs[, size_class]))) { | |
| 19 factors <- c(unitobs, code_species, size_class) | |
| 20 }else{ | |
| 21 factors <- c(unitobs, code_species) | |
| 22 } | |
| 23 return(factors) | |
| 24 } | |
| 25 | |
| 26 ######################################### end of the function fact.def.f | |
| 27 | |
| 28 ######################################### start of the function def_typeobs_f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r | |
| 29 ####### Define observation type from colnames | |
| 30 | |
| 31 def_typeobs_f <- function(obs) { | |
| 32 if (any(is.element(c("rotation", "rot", "rotate"), colnames(obs)))) { | |
| 33 obs_type <- "SVR" | |
| 34 }else{ | |
| 35 obs_type <- "other" | |
| 36 } | |
| 37 return(obs_type) | |
| 38 } | |
| 39 ######################################### end of the function fact.def.f | |
| 40 | |
| 41 ######################################### start of the function create_unitobs called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r | |
| 42 ####### Create unitobs column when inexistant | |
| 43 create_unitobs <- function(data, year = "year", location = "location", unitobs = "observation.unit") { | |
| 44 if (is.element(paste(unitobs), colnames(data))) { | |
| 45 unitab <- data | |
| 46 }else{ | |
| 47 | |
| 48 unitab <- tidyr::unite(data, col = "observation.unit", c(year, location)) | |
| 49 } | |
| 50 return(unitab) | |
| 51 } | |
| 52 ######################################### start of the function create_unitobs | |
| 53 | |
| 54 ######################################### start of the function create_year_location called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r | |
| 55 ####### separate unitobs column when existant | |
| 56 create_year_location <- function(data, year = "year", location = "location", unitobs = "observation.unit") { | |
| 57 if (all(grepl("[1-2][0|8|9][0-9]{2}_.*", data[, unitobs])) == TRUE) { | |
| 58 tab <- tidyr::separate(data, col = unitobs, into = c(year, location), sep = "_") | |
| 59 }else{ | |
| 60 if (all(grepl("[A-Z]{2}[0-9]{2}.*", data[, unitobs]) == TRUE)) { | |
| 61 tab <- tidyr::separate(data, col = unitobs, into = c("site1", year, "obs"), sep = c(2, 4)) | |
| 62 tab <- tidyr::unite(tab, col = location, c("site1", "obs")) | |
| 63 }else{ | |
| 64 tab <- data | |
| 65 } | |
| 66 } | |
| 67 | |
| 68 tab <- cbind(tab, observation.unit = data[, unitobs]) | |
| 69 | |
| 70 return(tab) | |
| 71 } | |
| 72 ######################################### start of the function create_year_location | |
| 73 | |
| 74 ######################################### start of the function check_file called by every Galaxy Rscripts | |
| 75 | |
| 76 check_file <- function(dataset, err_msg, vars, nb_vars) { | |
| 77 | |
| 78 ## Purpose: General function to check integrity of input file. Will | |
| 79 ## check numbers and contents of variables(colnames). | |
| 80 ## return an error message and exit if mismatch detected | |
| 81 ## ---------------------------------------------------------------------- | |
| 82 ## Arguments: dataset : dataset name | |
| 83 ## err_msg : output error | |
| 84 ## vars : expected name of variables | |
| 85 ## nb_vars : expected number of variables | |
| 86 ## ---------------------------------------------------------------------- | |
| 87 ## Author: Alan Amosse, Benjamin Yguel | |
| 88 | |
| 89 if (ncol(dataset) < nb_vars) { #checking for right number of columns in the file if not = error message | |
| 90 cat("\nerr nb var\n") | |
| 91 stop(err_msg, call. = FALSE) | |
| 92 } | |
| 93 | |
| 94 for (i in vars) { | |
| 95 if (!(i %in% names(dataset))) { #checking colnames | |
| 96 stop(err_msg, call. = FALSE) | |
| 97 } | |
| 98 } | |
| 99 } | |
| 100 | |
| 101 ######################################### end of the function check_file | |
| 102 | |
| 103 | |
| 104 ######################################### start of the function stat_rotations_nb_f called by calc_numbers_f | |
| 105 | |
| 106 stat_rotations_nb_f <- function(factors, obs) { | |
| 107 ## Purpose: Computing abundance statistics by rotation (max, sd) | |
| 108 ## on SVR data | |
| 109 ## ---------------------------------------------------------------------- | |
| 110 ## Arguments: factors : Names of aggregation factors | |
| 111 ## obs : observation data | |
| 112 ## ---------------------------------------------------------------------- | |
| 113 ## Author: Yves Reecht, Date: 29 oct. 2012, 16:01 modified by Coline ROYAUX 04 june 2020 | |
| 114 | |
| 115 ## Identification of valid rotations : | |
| 116 if (is.element("observation.unit", factors)) { | |
| 117 ## valid rotations (empty must be there as well) : | |
| 118 rotations <- tapply(obs$rotation, | |
| 119 as.list(obs[, c("observation.unit", "rotation"), drop = FALSE]), | |
| 120 function(x)length(x) > 0) | |
| 121 | |
| 122 ## Changing NA rotations in FALSE : | |
| 123 rotations[is.na(rotations)] <- FALSE | |
| 124 } | |
| 125 | |
| 126 ## ########################################################### | |
| 127 ## Abundance per rotation at chosen aggregation factors : | |
| 128 nombres_rot <- tapply(obs$number, | |
| 129 as.list(obs[, c(factors, "rotation"), drop = FALSE]), | |
| 130 function(x, ...) { | |
| 131 ifelse(all(is.na(x)), NA, sum(x, ...)) | |
| 132 }, | |
| 133 na.rm = TRUE) | |
| 134 | |
| 135 ## If valid rotation NA are considered 0 : | |
| 136 nombres_rot <- sweep(nombres_rot, | |
| 137 match(names(dimnames(rotations)), names(dimnames(nombres_rot)), nomatch = NULL), | |
| 138 rotations, # Tableau des secteurs valides (booléens). | |
| 139 function(x, y) { | |
| 140 x[is.na(x) & y] <- 0 # Lorsque NA et secteur valide => 0. | |
| 141 return(x) | |
| 142 }) | |
| 143 | |
| 144 ## ################################################## | |
| 145 ## Statistics : | |
| 146 | |
| 147 ## Means : | |
| 148 nb_mean <- apply(nombres_rot, which(is.element(names(dimnames(nombres_rot)), factors)), | |
| 149 function(x, ...) { | |
| 150 ifelse(all(is.na(x)), NA, mean(x, ...)) | |
| 151 }, na.rm = TRUE) | |
| 152 | |
| 153 ## Maxima : | |
| 154 nb_max <- apply(nombres_rot, which(is.element(names(dimnames(nombres_rot)), factors)), | |
| 155 function(x, ...) { | |
| 156 ifelse(all(is.na(x)), NA, max(x, ...)) | |
| 157 }, na.rm = TRUE) | |
| 158 | |
| 159 ## SD : | |
| 160 nb_sd <- apply(nombres_rot, which(is.element(names(dimnames(nombres_rot)), factors)), | |
| 161 function(x, ...) { | |
| 162 ifelse(all(is.na(x)), NA, sd(x, ...)) | |
| 163 }, na.rm = TRUE) | |
| 164 | |
| 165 ## Valid rotations count : | |
| 166 nombres_rotations <- apply(rotations, 1, sum, na.rm = TRUE) | |
| 167 | |
| 168 ## Results returned as list : | |
| 169 return(list(nb_mean = nb_mean, nb_max = nb_max, nb_sd = nb_sd, | |
| 170 nombres_rotations = nombres_rotations, nombresTot = nombres_rot)) | |
| 171 } | |
| 172 | |
| 173 ######################################### end of the function stat_rotations_nb_f | |
| 174 | |
| 175 ######################################### start of the function calc_nb_default_f called by calc_numbers_f | |
| 176 | |
| 177 calc_nb_default_f <- function(obs, | |
| 178 factors = c("observation.unit", "species.code", "size.class"), | |
| 179 nb_name = "number") { | |
| 180 ## Purpose : Compute abundances at finest aggregation | |
| 181 ## --------------------------------------------------------------------- | |
| 182 ## Arguments: obs : observation table | |
| 183 ## factors : aggregation factors | |
| 184 ## nb_name : name of abundance column. | |
| 185 ## | |
| 186 ## Output: array with ndimensions = nfactors. | |
| 187 ## ---------------------------------------------------------------------- | |
| 188 ## Author: Yves Reecht, Date: 19 déc. 2011, 13:38 modified by Coline ROYAUX 04 june 2020 | |
| 189 | |
| 190 ## Sum individuals number : | |
| 191 nbr <- tapply(obs[, nb_name], | |
| 192 as.list(obs[, factors]), | |
| 193 sum, na.rm = TRUE) | |
| 194 | |
| 195 ## Absences as "true zero" : | |
| 196 nbr[is.na(nbr)] <- 0 | |
| 197 | |
| 198 return(nbr) | |
| 199 } | |
| 200 | |
| 201 ######################################### end of the function calc_nb_default_f | |
| 202 | |
| 203 ######################################### start of the function calc_numbers_f | |
| 204 | |
| 205 calc_numbers_f <- function(obs, obs_type = "", factors = c("observation.unit", "species.code", "size.class"), nb_name = "number") { | |
| 206 ## Purpose: Produce data.frame used as table from output of calc_nb_default_f(). | |
| 207 ## ---------------------------------------------------------------------- | |
| 208 ## Arguments: obs : observation table | |
| 209 ## obs_type : Type of observation (SVR, LIT, ...) | |
| 210 ## factors : aggregation factors | |
| 211 ## nb_name : name of abundance column | |
| 212 ## | |
| 213 ## Output: data.frame with (N aggregation factors + 1) columns | |
| 214 ## ---------------------------------------------------------------------- | |
| 215 ## Author: Yves Reecht, Date: 19 déc. 2011, 13:46 modified by Coline ROYAUX 04 june 2020 | |
| 216 | |
| 217 if (obs_type == "SVR") { | |
| 218 ## Compute SVR abundances statistics : | |
| 219 stat_rotations <- stat_rotations_nb_f(factors = factors, | |
| 220 obs = obs) | |
| 221 | |
| 222 ## Mean for rotating videos (3 rotations at most times) : | |
| 223 nbr <- stat_rotations[["nb_mean"]] | |
| 224 | |
| 225 }else{ | |
| 226 | |
| 227 nbr <- calc_nb_default_f(obs, factors, nb_name) | |
| 228 } | |
| 229 | |
| 230 res <- as.data.frame(as.table(nbr), responseName = nb_name) | |
| 231 | |
| 232 if (is.element("size.class", colnames(res))) { | |
| 233 res$size.class[res$size.class == ""] <- NA | |
| 234 } | |
| 235 | |
| 236 ## If integer abundances : | |
| 237 if (isTRUE(all.equal(res[, nb_name], as.integer(res[, nb_name])))) { | |
| 238 res[, nb_name] <- as.integer(res[, nb_name]) | |
| 239 } | |
| 240 | |
| 241 if (obs_type == "SVR") { | |
| 242 ## statistics on abundances : | |
| 243 res[, "number.max"] <- as.vector(stat_rotations[["nb_max"]]) | |
| 244 res[, "number.sd"] <- as.vector(stat_rotations[["nb_sd"]]) | |
| 245 | |
| 246 } | |
| 247 | |
| 248 return(res) | |
| 249 } | |
| 250 | |
| 251 ######################################### end of the function calc_numbers_f | |
| 252 | |
| 253 ######################################### start of the function pres_abs_f called by calc_biodiv_f | |
| 254 | |
| 255 pres_abs_f <- function(nombres, logical = FALSE) { | |
| 256 ## Purpose: Compute presence absence from abundances | |
| 257 ## ---------------------------------------------------------------------- | |
| 258 ## Arguments: nombres : vector of individuals count. | |
| 259 ## logical : (boolean) results as boolean or 0/1 ? | |
| 260 ## ---------------------------------------------------------------------- | |
| 261 ## Author: Yves Reecht, Date: 29 oct. 2010, 10:20 modified by Coline ROYAUX 04 june 2020 | |
| 262 | |
| 263 if (any(nombres < 0, na.rm = TRUE)) { | |
| 264 stop("Negative abundances!") | |
| 265 } | |
| 266 | |
| 267 if (logical) { | |
| 268 return(nombres > 0) | |
| 269 }else{ | |
| 270 nombres[nombres > 0] <- 1 | |
| 271 return(nombres) | |
| 272 } | |
| 273 } | |
| 274 | |
| 275 ######################################### end of the function pres_abs_f | |
| 276 | |
| 277 ######################################### start of the function bettercbind called by agregations_generic_f | |
| 278 | |
| 279 bettercbind <- function(..., df_list = NULL, deparse.level = 1) { | |
| 280 ## Purpose: Apply cbind to data frame with mathcing columns but without | |
| 281 ## redundancies. | |
| 282 ## ---------------------------------------------------------------------- | |
| 283 ## Arguments: same as cbind... | |
| 284 ## df_list : data.frames list | |
| 285 ## ---------------------------------------------------------------------- | |
| 286 ## Author: Yves Reecht, Date: 17 janv. 2012, 21:10 modified by Coline ROYAUX 04 june 2020 | |
| 287 | |
| 288 if (is.null(df_list)) { | |
| 289 df_list <- list(...) | |
| 290 } | |
| 291 | |
| 292 return(do.call(cbind, | |
| 293 c(list(df_list[[1]][, c(tail(colnames(df_list[[1]]), -1), | |
| 294 head(colnames(df_list[[1]]), 1))]), | |
| 295 lapply(df_list[- 1], | |
| 296 function(x, coldel) { | |
| 297 return(x[, !is.element(colnames(x), | |
| 298 coldel), | |
| 299 drop = FALSE]) | |
| 300 }, | |
| 301 coldel = colnames(df_list[[1]])), | |
| 302 deparse.level = deparse.level))) | |
| 303 } | |
| 304 | |
| 305 ######################################### end of the function bettercbind | |
| 306 | |
| 307 ######################################### start of the function agregation_f called by agregations_generic_f | |
| 308 | |
| 309 agregation_f <- function(metric, d_ata, factors, cas_metric, | |
| 310 nb_name = "number") { | |
| 311 ## Purpose: metric aggregation | |
| 312 ## ---------------------------------------------------------------------- | |
| 313 ## Arguments: metric: colnames of chosen metric | |
| 314 ## d_ata: Unaggregated data table | |
| 315 ## factors: aggregation factors vector | |
| 316 ## cas_metric: named vector of observation types depending | |
| 317 ## on chosen metric | |
| 318 ## nb_name : abundance column name | |
| 319 ## ---------------------------------------------------------------------- | |
| 320 ## Author: Yves Reecht, Date: 20 déc. 2011, 14:29 modified by Coline ROYAUX 04 june 2020 | |
| 321 | |
| 322 switch(cas_metric[metric], | |
| 323 "sum" = { | |
| 324 res <- tapply(d_ata[, metric], | |
| 325 as.list(d_ata[, factors, drop = FALSE]), | |
| 326 function(x) { | |
| 327 ifelse(all(is.na(x)), | |
| 328 NA, | |
| 329 sum(x, na.rm = TRUE)) | |
| 330 }) | |
| 331 }, | |
| 332 "w.mean" = { | |
| 333 res <- tapply(seq_len(nrow(d_ata)), | |
| 334 as.list(d_ata[, factors, drop = FALSE]), | |
| 335 function(ii) { | |
| 336 ifelse(all(is.na(d_ata[ii, metric])), | |
| 337 NA, | |
| 338 weighted.mean(d_ata[ii, metric], | |
| 339 d_ata[ii, nb_name], | |
| 340 na.rm = TRUE)) | |
| 341 }) | |
| 342 }, | |
| 343 "w.mean.colonies" = { | |
| 344 res <- tapply(seq_len(nrow(d_ata)), | |
| 345 as.list(d_ata[, factors, drop = FALSE]), | |
| 346 function(ii) { | |
| 347 ifelse(all(is.na(d_ata[ii, metric])), | |
| 348 NA, | |
| 349 weighted.mean(d_ata[ii, metric], | |
| 350 d_ata[ii, "colonies"], | |
| 351 na.rm = TRUE)) | |
| 352 }) | |
| 353 }, | |
| 354 "w.mean.prop" = { | |
| 355 res <- tapply(seq_len(nrow(d_ata)), | |
| 356 as.list(d_ata[, factors, drop = FALSE]), | |
| 357 function(ii) { | |
| 358 ifelse(all(is.na(d_ata[ii, metric])) || sum(d_ata[ii, "nombre.tot"], na.rm = TRUE) == 0, | |
| 359 NA, | |
| 360 ifelse(all(na.omit(d_ata[ii, metric]) == 0), | |
| 361 0, | |
| 362 (sum(d_ata[ii, nb_name][!is.na(d_ata[ii, metric])], na.rm = TRUE) / | |
| 363 sum(d_ata[ii, "nombre.tot"], na.rm = TRUE)) * | |
| 364 ## Correction if size class isn't an aggregation factor | |
| 365 ## (otherwise value divided by number of present classes) : | |
| 366 ifelse(is.element("size.class", factors), | |
| 367 100, | |
| 368 100 * length(unique(d_ata$size.class))))) | |
| 369 }) | |
| 370 | |
| 371 }, | |
| 372 "w.mean.prop.bio" = { | |
| 373 res <- tapply(seq_len(nrow(d_ata)), | |
| 374 as.list(d_ata[, factors, drop = FALSE]), | |
| 375 function(ii) { | |
| 376 ifelse(all(is.na(d_ata[ii, metric])) || sum(d_ata[ii, "tot.biomass"], na.rm = TRUE) == 0, | |
| 377 NA, | |
| 378 ifelse(all(na.omit(d_ata[ii, metric]) == 0), | |
| 379 0, | |
| 380 (sum(d_ata[ii, "biomass"][!is.na(d_ata[ii, metric])], na.rm = TRUE) / | |
| 381 sum(d_ata[ii, "tot.biomass"], na.rm = TRUE)) * | |
| 382 ## Correction if size class isn't an aggregation factor | |
| 383 ## (otherwise value divided by number of present classes) : | |
| 384 ifelse(is.element("size.class", factors), | |
| 385 100, | |
| 386 100 * length(unique(d_ata$size.class))))) | |
| 387 }) | |
| 388 | |
| 389 }, | |
| 390 "pres" = { | |
| 391 res <- tapply(d_ata[, metric], | |
| 392 as.list(d_ata[, factors, drop = FALSE]), | |
| 393 function(x) { | |
| 394 ifelse(all(is.na(x)), # When only NAs. | |
| 395 NA, | |
| 396 ifelse(any(x > 0, na.rm = TRUE), # Otherwise... | |
| 397 1, # ... presence if at least one observation in the group. | |
| 398 0)) | |
| 399 }) | |
| 400 }, | |
| 401 "nbMax" = { | |
| 402 | |
| 403 ## Sum by factor cross / rotation : | |
| 404 nb_tmp2 <- apply(nb_tmp, | |
| 405 which(is.element(names(dimnames(nb_tmp)), c(factors, "rotation"))), | |
| 406 function(x) { | |
| 407 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE)) | |
| 408 }) | |
| 409 | |
| 410 ## Sum by factor cross : | |
| 411 res <- as.array(apply(nb_tmp2, | |
| 412 which(is.element(names(dimnames(nb_tmp)), factors)), | |
| 413 function(x) { | |
| 414 ifelse(all(is.na(x)), NA, max(x, na.rm = TRUE)) | |
| 415 })) | |
| 416 }, | |
| 417 "nbSD" = { | |
| 418 | |
| 419 ## Sum by factor cross / rotation : | |
| 420 nb_tmp2 <- apply(nb_tmp, | |
| 421 which(is.element(names(dimnames(nb_tmp)), c(factors, "rotation"))), | |
| 422 function(x) { | |
| 423 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE)) | |
| 424 }) | |
| 425 | |
| 426 ## Sum by factor cross : | |
| 427 res <- as.array(apply(nb_tmp2, | |
| 428 which(is.element(names(dimnames(nb_tmp)), factors)), | |
| 429 function(x) { | |
| 430 ifelse(all(is.na(x)), NA, sd(x, na.rm = TRUE)) | |
| 431 })) | |
| 432 }, | |
| 433 "densMax" = { | |
| 434 | |
| 435 ## Sum by factor cross / rotation : | |
| 436 dens_tmp2 <- apply(dens_tmp, | |
| 437 which(is.element(names(dimnames(dens_tmp)), c(factors, "rotation"))), | |
| 438 function(x) { | |
| 439 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE)) | |
| 440 }) | |
| 441 | |
| 442 ## Sum by factor cross : | |
| 443 res <- as.array(apply(dens_tmp2, | |
| 444 which(is.element(names(dimnames(dens_tmp)), factors)), | |
| 445 function(x) { | |
| 446 ifelse(all(is.na(x)), NA, max(x, na.rm = TRUE)) | |
| 447 })) | |
| 448 }, | |
| 449 "densSD" = { | |
| 450 | |
| 451 ## Sum by factor cross / rotation : | |
| 452 dens_tmp2 <- apply(dens_tmp, | |
| 453 which(is.element(names(dimnames(dens_tmp)), c(factors, "rotation"))), | |
| 454 function(x) { | |
| 455 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE)) | |
| 456 }) | |
| 457 | |
| 458 ## Sum by factor cross : | |
| 459 res <- as.array(apply(dens_tmp2, | |
| 460 which(is.element(names(dimnames(dens_tmp)), factors)), | |
| 461 function(x) { | |
| 462 ifelse(all(is.na(x)), NA, sd(x, na.rm = TRUE)) | |
| 463 })) | |
| 464 }, | |
| 465 "%.nesting" = { | |
| 466 res <- tapply(seq_len(nrow(d_ata)), | |
| 467 as.list(d_ata[, factors, drop = FALSE]), | |
| 468 function(ii) { | |
| 469 ifelse(all(is.na(d_ata[ii, metric])), | |
| 470 NA, | |
| 471 weighted.mean(d_ata[ii, metric], | |
| 472 d_ata[ii, "readable.tracks"], | |
| 473 na.rm = TRUE)) | |
| 474 }) | |
| 475 }, | |
| 476 stop("Not implemented!") | |
| 477 ) | |
| 478 | |
| 479 ## dimension names | |
| 480 names(dimnames(res)) <- c(factors) | |
| 481 | |
| 482 ## Transformation to long format : | |
| 483 reslong <- as.data.frame(as.table(res), responseName = metric) | |
| 484 reslong <- reslong[, c(tail(colnames(reslong), 1), head(colnames(reslong), -1))] # metric first | |
| 485 | |
| 486 return(reslong) | |
| 487 } | |
| 488 | |
| 489 ######################################### end of the function agregation_f | |
| 490 | |
| 491 ######################################### start of the function agregations_generic_f called y calc_biodiv_f in FucntExeCalcCommIndexesGalaxy.r | |
| 492 | |
| 493 agregations_generic_f <- function(d_ata, metrics, factors, list_fact = NULL, unit_sp_sz = NULL, unit_sp = NULL, | |
| 494 nb_name = "number") { | |
| 495 ## Purpose: Aggregate data | |
| 496 ## ---------------------------------------------------------------------- | |
| 497 ## Arguments: d_ata : data set | |
| 498 ## metrics : aggregated metric | |
| 499 ## factors : aggregation factors | |
| 500 ## list_fact : other factors to aggregate and add to output | |
| 501 ## unit_sp_sz : Metrics table by unitobs/species/Size Class | |
| 502 ## unit_sp : Metrics table by unitobs/species | |
| 503 ## nb_name : abundance colname | |
| 504 ## | |
| 505 ## Output : aggregated data frame | |
| 506 ## ---------------------------------------------------------------------- | |
| 507 ## Author: Yves Reecht, Date: 18 oct. 2010, 15:47 modified by Coline ROYAUX 04 june 2020 | |
| 508 | |
| 509 ## trt depending on metric type : | |
| 510 cas_metric <- c("number" = "sum", | |
| 511 "mean.length" = "w.mean", | |
| 512 "taille_moy" = "w.mean", | |
| 513 "biomass" = "sum", | |
| 514 "Biomass" = "sum", | |
| 515 "weight" = "sum", | |
| 516 "mean.weight" = "w.mean", | |
| 517 "density" = "sum", | |
| 518 "Density" = "sum", | |
| 519 "CPUE" = "sum", | |
| 520 "CPUE.biomass" = "sum", | |
| 521 "presence_absence" = "pres", | |
| 522 "abundance.prop.SC" = "w.mean.prop", # Not OK [!!!] ? | |
| 523 "biomass.prop.SC" = "w.mean.prop.bio", # Not OK [!!!] ? | |
| 524 ## Benthos : | |
| 525 "colonies" = "sum", | |
| 526 "coverage" = "sum", | |
| 527 "mean.size.colonies" = "w.mean.colonies", | |
| 528 ## SVR (expérimental) : | |
| 529 "number.max" = "nbMax", | |
| 530 "number.sd" = "nbSD", | |
| 531 "density.max" = "densMax", | |
| 532 "density.sd" = "densSD", | |
| 533 "biomass.max" = "sum", | |
| 534 "spawning.success" = "%.nesting", | |
| 535 "spawnings" = "sum", | |
| 536 "readable.tracks" = "sum", | |
| 537 "tracks.number" = "sum") | |
| 538 | |
| 539 ## add "readable.tracks" for egg laying percentage : | |
| 540 if (any(cas_metric[metrics] == "%.nesting")) { | |
| 541 if (is.element("size.class", colnames(d_ata))) { | |
| 542 if (is.null(unit_sp_sz)) stop("unit_sp_sz doit être défini") | |
| 543 | |
| 544 d_ata <- merge(d_ata, | |
| 545 unit_sp_sz[, c("species.code", "observation.unit", "size.class", "readable.tracks")], | |
| 546 by = c("species.code", "observation.unit", "size.class"), | |
| 547 suffixes = c("", ".y")) | |
| 548 }else{ | |
| 549 if (is.null(unit_sp)) stop("unit_sp must be defined") | |
| 550 | |
| 551 d_ata <- merge(d_ata, | |
| 552 unit_sp[, c("species.code", "observation.unit", "readable.tracks")], | |
| 553 by = c("species.code", "observation.unit"), | |
| 554 suffixes = c("", ".y")) | |
| 555 } | |
| 556 } | |
| 557 | |
| 558 ## Add "number" field for computing ponderate means if absent : | |
| 559 if (any(cas_metric[metrics] == "w.mean" | cas_metric[metrics] == "w.mean.prop")) { | |
| 560 if (is.element("size.class", colnames(d_ata))) { | |
| 561 if (is.null(unit_sp_sz)) stop("unit_sp_sz must be defined") | |
| 562 | |
| 563 d_ata <- merge(d_ata, | |
| 564 unit_sp_sz[, c("species.code", "observation.unit", "size.class", nb_name)], | |
| 565 by = c("species.code", "observation.unit", "size.class"), | |
| 566 suffixes = c("", ".y")) | |
| 567 | |
| 568 ## add tot abundance / species / observation unit : | |
| 569 nb_tot <- tapply(unit_sp_sz[, nb_name], | |
| 570 as.list(unit_sp_sz[, c("species.code", "observation.unit")]), | |
| 571 sum, na.rm = TRUE) | |
| 572 | |
| 573 d_ata <- merge(d_ata, | |
| 574 as.data.frame(as.table(nb_tot), responseName = "nombre.tot")) | |
| 575 }else{ | |
| 576 if (is.null(unit_sp)) stop("unit_sp must be defined") | |
| 577 | |
| 578 d_ata <- merge(d_ata, | |
| 579 unit_sp[, c("species.code", "observation.unit", nb_name)], # [!!!] unit_sp_sz ? | |
| 580 by = c("species.code", "observation.unit"), | |
| 581 suffixes = c("", ".y")) | |
| 582 } | |
| 583 } | |
| 584 | |
| 585 ## Add biomass field of biomass proportion by size class : | |
| 586 if (any(cas_metric[metrics] == "w.mean.prop.bio")) { | |
| 587 if (is.null(unit_sp_sz)) stop("unit_sp_sz doit être défini") | |
| 588 | |
| 589 d_ata <- merge(d_ata, | |
| 590 unit_sp_sz[, c("species.code", "observation.unit", "size.class", "biomass")], | |
| 591 by = c("species.code", "observation.unit", "size.class"), | |
| 592 suffixes = c("", ".y")) | |
| 593 | |
| 594 ## add tot biomass / species / observation unit : | |
| 595 biom_tot <- tapply(unit_sp_sz$biomass, | |
| 596 as.list(unit_sp_sz[, c("species.code", "observation.unit")]), | |
| 597 function(x) { | |
| 598 ifelse(all(is.na(x)), | |
| 599 NA, | |
| 600 sum(x, na.rm = TRUE)) | |
| 601 }) | |
| 602 | |
| 603 d_ata <- merge(d_ata, | |
| 604 as.data.frame(as.table(biom_tot), responseName = "tot.biomass")) | |
| 605 } | |
| 606 | |
| 607 ## add colony field for ponderate means pondérées if absent : | |
| 608 if (any(cas_metric[metrics] == "w.mean.colonies" & ! is.element("colonies", colnames(d_ata)))) { | |
| 609 d_ata$colonies <- unit_sp[match(apply(d_ata[, c("species.code", "observation.unit")], | |
| 610 1, paste, collapse = "*"), | |
| 611 apply(unit_sp[, c("species.code", "observation.unit")], | |
| 612 1, paste, collapse = "*")), "colonies"] | |
| 613 } | |
| 614 | |
| 615 | |
| 616 ## Aggregation of metric depending on factors : | |
| 617 reslong <- bettercbind(df_list = lapply(metrics, # sapply used to have names | |
| 618 agregation_f, | |
| 619 d_ata = d_ata, factors = factors, cas_metric = cas_metric, | |
| 620 nb_name = nb_name)) | |
| 621 | |
| 622 ## Aggregation and add other factors : | |
| 623 if (! (is.null(list_fact) || length(list_fact) == 0)) { | |
| 624 reslong <- cbind(reslong, | |
| 625 sapply(d_ata[, list_fact, drop = FALSE], | |
| 626 function(fact) { | |
| 627 tapply(fact, | |
| 628 as.list(d_ata[, factors, drop = FALSE]), | |
| 629 function(x) { | |
| 630 if (length(x) > 1 && length(unique(x)) > 1) { # must be one modality | |
| 631 return(NULL) # otherwise it is NULL | |
| 632 }else{ | |
| 633 unique(as.character(x)) | |
| 634 } | |
| 635 }) | |
| 636 })) | |
| 637 } | |
| 638 | |
| 639 ## If some factors aren't at the right class : | |
| 640 if (any(tmp <- sapply(reslong[, list_fact, drop = FALSE], class) != sapply(d_ata[, list_fact, drop = FALSE], class))) { | |
| 641 for (i in which(tmp)) { | |
| 642 switch(sapply(d_ata[, list_fact, drop = FALSE], class)[i], | |
| 643 "integer" = { | |
| 644 reslong[, list_fact[i]] <- as.integer(as.character(reslong[, list_fact[i]])) | |
| 645 }, | |
| 646 "numeric" = { | |
| 647 reslong[, list_fact[i]] <- as.numeric(as.character(reslong[, list_fact[i]])) | |
| 648 }, | |
| 649 reslong[, list_fact[i]] <- eval(call(paste("as", sapply(d_ata[, list_fact, drop = FALSE], class)[i], sep = "."), | |
| 650 reslong[, list_fact[i]])) | |
| 651 ) | |
| 652 } | |
| 653 } | |
| 654 | |
| 655 ## Initial order of factors levels : | |
| 656 reslong <- as.data.frame(sapply(colnames(reslong), | |
| 657 function(x) { | |
| 658 if (is.factor(reslong[, x])) { | |
| 659 return(factor(reslong[, x], levels = levels(d_ata[, x]))) | |
| 660 }else{ | |
| 661 return(reslong[, x]) | |
| 662 } | |
| 663 }, simplify = FALSE)) | |
| 664 | |
| 665 | |
| 666 ## Check of other aggregated factors supplémentaires. There must be no NULL elements : | |
| 667 if (any(sapply(reslong[, list_fact], function(x) { | |
| 668 any(is.null(unlist(x))) | |
| 669 }))) { | |
| 670 warning(paste("One of the suppl. factors is probably a subset", | |
| 671 " of the observations grouping factor(s).", sep = "")) | |
| 672 return(NULL) | |
| 673 }else{ | |
| 674 return(reslong) | |
| 675 } | |
| 676 } | |
| 677 | |
| 678 ######################################### end of the function agregations_generic_f | |
| 679 | |
| 680 ######################################### start of the function drop_levels_f called y calc_biodiv_f in FucntExeCalcCommIndexesGalaxy.r and glm_community in FunctExeCalcGLMGalaxy.r | |
| 681 drop_levels_f <- function(df, which = NULL) { | |
| 682 ## Purpose: Suppress unused levels of factors | |
| 683 ## ---------------------------------------------------------------------- | |
| 684 ## Arguments: df : a data.frame | |
| 685 ## which : included columns index (all by default) | |
| 686 ## ---------------------------------------------------------------------- | |
| 687 ## Author: Yves Reecht, Date: 10 août 2010, 13:29 modified by Coline ROYAUX 04 june 2020 | |
| 688 | |
| 689 if (class(df) != "data.frame") { | |
| 690 stop("'df' must be a data.frame") | |
| 691 }else{ | |
| 692 if (is.null(which)) { | |
| 693 x <- as.data.frame(sapply(df, function(x) { | |
| 694 return(x[, drop = TRUE]) | |
| 695 }, simplify = FALSE), | |
| 696 stringsAsFactors = FALSE) | |
| 697 }else{ # Only some columns used | |
| 698 x <- df | |
| 699 | |
| 700 x[, which] <- as.data.frame(sapply(df[, which, drop = FALSE], | |
| 701 function(x) { | |
| 702 return(x[, drop = TRUE]) | |
| 703 }, simplify = FALSE), | |
| 704 stringsAsFactors = FALSE) | |
| 705 } | |
| 706 | |
| 707 return(x) | |
| 708 } | |
| 709 } | |
| 710 ######################################### end of the function drop_levels_f | |
| 711 | |
| 712 ######################################### start of the function subset_all_tables_f called by glm_community in FunctExeCalcGLMGalaxy.r | |
| 713 | |
| 714 subset_all_tables_f <- function(metrique, tab_metrics, facteurs, selections, | |
| 715 tab_unitobs, refesp, tab_metrique = "", nb_name = "number", obs_type = "", | |
| 716 exclude = NULL, add = c("species.code", "observation.unit")) { | |
| 717 ## Purpose: Extract useful data only from chosen metrics and factors | |
| 718 ## ---------------------------------------------------------------------- | |
| 719 ## Arguments: metrique : chosen metric | |
| 720 ## facteurs : all chosen factors | |
| 721 ## selections : corresponding modality selected | |
| 722 ## tab_metrique : metrics table name | |
| 723 ## exclude : factors levels to exclude | |
| 724 ## add : field to add to data table | |
| 725 ## ---------------------------------------------------------------------- | |
| 726 ## Author: Yves Reecht, Date: 6 août 2010, 16:46 modified by Coline ROYAUX 04 june 2020 | |
| 727 | |
| 728 ## If no metrics table available : | |
| 729 if (is.element(tab_metrique, c("", "TableOccurrences", "TablePresAbs"))) { | |
| 730 tab_metrique <- "unit_sp" | |
| 731 } | |
| 732 | |
| 733 cas_tables <- c("unit_sp" = "unit_sp", | |
| 734 "TablePresAbs" = "unit_sp", | |
| 735 "unit_sp_sz" = "unit_sp_sz") | |
| 736 | |
| 737 ## Recuperation of metrics table : | |
| 738 data_metric <- tab_metrics | |
| 739 unitobs <- tab_unitobs | |
| 740 refesp <- refesp | |
| 741 | |
| 742 ## If no metrics available or already computed : | |
| 743 if (is.element(metrique, c("", "occurrence.frequency"))) { | |
| 744 metrique <- "tmp" | |
| 745 data_metric$tmp <- 0 | |
| 746 data_metric$tmp[data_metric[, nb_name] > 0] <- 1 | |
| 747 } | |
| 748 | |
| 749 if (!is.null(add)) { | |
| 750 metriques <- c(metrique, add[is.element(add, colnames(data_metric))]) | |
| 751 }else{ | |
| 752 metriques <- metrique | |
| 753 } | |
| 754 | |
| 755 ## Subset depending on metrics table | |
| 756 switch(cas_tables[tab_metrique], | |
| 757 ## Observation table by unitobs and species : | |
| 758 unit_sp = { | |
| 759 restmp <- cbind(data_metric[!is.na(data_metric[, metrique]), metriques, drop = FALSE], | |
| 760 unitobs[match(data_metric$observation.unit[!is.na(data_metric[, metrique])], | |
| 761 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs | |
| 762 facteurs[is.element(facteurs, colnames(unitobs))], drop = FALSE], | |
| 763 refesp[match(data_metric$species.code[!is.na(data_metric[, metrique])], | |
| 764 refesp$species.code), # ajout des colonnes sélectionnées d'especes | |
| 765 facteurs[is.element(facteurs, colnames(refesp))], drop = FALSE]) | |
| 766 }, | |
| 767 ## Observation table by unitobs, species and size class : | |
| 768 unit_sp_sz = { | |
| 769 restmp <- cbind(data_metric[!is.na(data_metric[, metrique]), | |
| 770 c(metriques, "size.class"), drop = FALSE], | |
| 771 unitobs[match(data_metric$observation.unit[!is.na(data_metric[, metrique])], | |
| 772 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs | |
| 773 facteurs[is.element(facteurs, colnames(unitobs))], drop = FALSE], | |
| 774 refesp[match(data_metric$species.code[!is.na(data_metric[, metrique])], | |
| 775 refesp$species.code), # ajout des colonnes sélectionnées d'especes | |
| 776 facteurs[is.element(facteurs, colnames(refesp))], drop = FALSE]) | |
| 777 }, | |
| 778 ## Other cases : | |
| 779 restmp <- cbind(data_metric[!is.na(data_metric[, metrique]), metriques, drop = FALSE], | |
| 780 unitobs[match(data_metric$observation.unit[!is.na(data_metric[, metrique])], | |
| 781 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs. | |
| 782 facteurs[is.element(facteurs, colnames(unitobs))], drop = FALSE]) | |
| 783 ) | |
| 784 | |
| 785 sel_col <- which(!is.na(selections)) | |
| 786 if (!is.null(exclude)) { | |
| 787 sel_col <- sel_col[sel_col != exclude] | |
| 788 } | |
| 789 | |
| 790 ## Particular case of size classes : | |
| 791 if (is.element("size.class", colnames(restmp))) { | |
| 792 if (length(grep("^[[:digit:]]*[-_][[:digit:]]*$", unique(as.character(restmp$size.class)), perl = TRUE)) == | |
| 793 length(unique(as.character(restmp$size.class)))) { | |
| 794 restmp[, "size.class"] <- | |
| 795 factor(as.character(restmp$size.class), | |
| 796 levels = unique(as.character(restmp$size.class))[ | |
| 797 order(as.numeric(sub("^([[:digit:]]*)[-_][[:digit:]]*$", | |
| 798 "\\1", | |
| 799 unique(as.character(restmp$size.class)), | |
| 800 perl = TRUE)), | |
| 801 na.last = FALSE)]) | |
| 802 }else{ | |
| 803 restmp[, "size.class"] <- factor(restmp$size.class) | |
| 804 } | |
| 805 } | |
| 806 | |
| 807 ## Biomass and density conversion -> /100m² : | |
| 808 if (any(is.element(colnames(restmp), c("biomass", "density", | |
| 809 "biomass.max", "density.max", | |
| 810 "biomass.sd", "density.sd"))) && obs_type != "fishing") { | |
| 811 restmp[, is.element(colnames(restmp), | |
| 812 c("biomass", "density", | |
| 813 "biomass.max", "density.max", | |
| 814 "biomass.sd", "density.sd"))] <- 100 * | |
| 815 restmp[, is.element(colnames(restmp), | |
| 816 c("biomass", "density", | |
| 817 "biomass.max", "density.max", | |
| 818 "biomass.sd", "density.sd"))] | |
| 819 } | |
| 820 | |
| 821 return(restmp) | |
| 822 } | |
| 823 | |
| 824 ######################################### end of the function subset_all_tables_f | |
| 825 | |
| 826 ######################################### start of the function organise_fact called by modeleLineaireWP2.xxx.f in FunctExeCalcGLMxxGalaxy.r | |
| 827 | |
| 828 organise_fact <- function(list_rand, list_fact) { | |
| 829 ## Purpose: organise response factors | |
| 830 ## ---------------------------------------------------------------------- | |
| 831 ## Arguments: list_rand : Analysis random factors list | |
| 832 ## list_fact : Analysis factors list | |
| 833 ## ---------------------------------------------------------------------- | |
| 834 ## Author: Coline ROYAUX 14 november 2020 | |
| 835 | |
| 836 if (list_rand[1] != "None") { | |
| 837 if (all(is.element(list_fact, list_rand)) || list_fact[1] == "None") { | |
| 838 resp_fact <- paste("(1|", paste(list_rand, collapse = ") + (1|"), ")") | |
| 839 list_f <- NULL | |
| 840 list_fact <- list_rand | |
| 841 }else{ | |
| 842 list_f <- list_fact[!is.element(list_fact, list_rand)] | |
| 843 resp_fact <- paste(paste(list_f, collapse = " + "), " + (1|", paste(list_rand, collapse = ") + (1|"), ")") | |
| 844 list_fact <- c(list_f, list_rand) | |
| 845 } | |
| 846 }else{ | |
| 847 list_f <- list_fact | |
| 848 resp_fact <- paste(list_fact, collapse = " + ") | |
| 849 } | |
| 850 return(list(resp_fact, list_f, list_fact)) | |
| 851 } | |
| 852 | |
| 853 ######################################### end of the function organise_fact | |
| 854 | |
| 855 ######################################### start of the function organise_fact called by modeleLineaireWP2.xxx.f in FunctExeCalcGLMxxGalaxy.r | |
| 856 distrib_choice <- function(distrib = distrib, metrique = metrique, data = tmpd_ata) { | |
| 857 ## Purpose: choose the right distribution | |
| 858 ## ---------------------------------------------------------------------- | |
| 859 ## Arguments: data : data used for analysis | |
| 860 ## metrique : Chosen metric | |
| 861 ## distrib : distribution law selected by user | |
| 862 ## ---------------------------------------------------------------------- | |
| 863 ## Author: Coline ROYAUX 14 november 2020 | |
| 864 | |
| 865 if (distrib == "None") { | |
| 866 if (metrique == "presence_absence") { | |
| 867 chose_distrib <- "binomial" | |
| 868 }else{ | |
| 869 switch(class(data[, metrique]), | |
| 870 "integer" = { | |
| 871 chose_distrib <- "poisson" | |
| 872 }, | |
| 873 "numeric" = { | |
| 874 chose_distrib <- "gaussian" | |
| 875 }, | |
| 876 stop("Selected metric class doesn't fit, you should select an integer or a numeric variable")) | |
| 877 } | |
| 878 }else{ | |
| 879 chose_distrib <- distrib | |
| 880 } | |
| 881 return(chose_distrib) | |
| 882 } | |
| 883 | |
| 884 ######################################### end of the function organise_fact | |
| 885 | |
| 886 ######################################### start of the function create_res_table called by modeleLineaireWP2.xxx.f in FunctExeCalcGLMxxGalaxy.r | |
| 887 create_res_table <- function(list_rand, list_fact, row, lev, distrib) { | |
| 888 ## Purpose: create results table | |
| 889 ## ---------------------------------------------------------------------- | |
| 890 ## Arguments: list_rand : Analysis random factors list | |
| 891 ## list_fact : Analysis factors list | |
| 892 ## row : rows of results table = species or separation factor | |
| 893 ## lev : Levels of analysis factors list | |
| 894 ## distrib : distribution law | |
| 895 ## ---------------------------------------------------------------------- | |
| 896 ## Author: Coline ROYAUX 04 october 2020 | |
| 897 | |
| 898 if (list_rand[1] != "None") { ## if random effects | |
| 899 tab_sum <- data.frame(analysis = row, Interest.var = NA, distribution = NA, AIC = NA, BIC = NA, logLik = NA, deviance = NA, df.resid = NA) | |
| 900 colrand <- unlist(lapply(list_rand, | |
| 901 FUN = function(x) { | |
| 902 lapply(c("Std.Dev", "NbObservation", "NbLevels"), | |
| 903 FUN = function(y) { | |
| 904 paste(x, y, collapse = ":") | |
| 905 }) | |
| 906 })) | |
| 907 tab_sum[, colrand] <- NA | |
| 908 | |
| 909 if (! is.null(lev)) { ## if fixed effects + random effects | |
| 910 colcoef <- unlist(lapply(c("(Intercept)", lev), | |
| 911 FUN = function(x) { | |
| 912 lapply(c("Estimate", "Std.Err", "Zvalue", "Pvalue", "IC_up", "IC_inf", "signif"), | |
| 913 FUN = function(y) { | |
| 914 paste(x, y, collapse = ":") | |
| 915 }) | |
| 916 })) | |
| 917 | |
| 918 }else{ ## if no fixed effects | |
| 919 colcoef <- NULL | |
| 920 } | |
| 921 | |
| 922 }else{ ## if no random effects | |
| 923 tab_sum <- data.frame(analysis = row, Interest.var = NA, distribution = NA, AIC = NA, Resid.deviance = NA, df.resid = NA, Null.deviance = NA, df.null = NA) | |
| 924 | |
| 925 switch(distrib, | |
| 926 "gaussian" = { | |
| 927 colcoef <- unlist(lapply(c("(Intercept)", lev), | |
| 928 FUN = function(x) { | |
| 929 lapply(c("Estimate", "Std.Err", "Tvalue", "Pvalue", "IC_up", "IC_inf", "signif"), | |
| 930 FUN = function(y) { | |
| 931 paste(x, y, collapse = ":") | |
| 932 }) | |
| 933 })) | |
| 934 | |
| 935 }, | |
| 936 "quasipoisson" = { | |
| 937 colcoef <- unlist(lapply(c("(Intercept)", lev), | |
| 938 FUN = function(x) { | |
| 939 lapply(c("Estimate", "Std.Err", "Tvalue", "Pvalue", "IC_up", "IC_inf", "signif"), | |
| 940 FUN = function(y) { | |
| 941 paste(x, y, collapse = ":") | |
| 942 }) | |
| 943 })) | |
| 944 | |
| 945 } | |
| 946 , { | |
| 947 colcoef <- unlist(lapply(c("(Intercept)", lev), | |
| 948 FUN = function(x) { | |
| 949 lapply(c("Estimate", "Std.Err", "Zvalue", "Pvalue", "IC_up", "IC_inf", "signif"), | |
| 950 FUN = function(y) { | |
| 951 paste(x, y, collapse = ":") | |
| 952 }) | |
| 953 })) | |
| 954 }) | |
| 955 | |
| 956 } | |
| 957 | |
| 958 tab_sum[, colcoef] <- NA | |
| 959 | |
| 960 | |
| 961 return(tab_sum) | |
| 962 } | |
| 963 ######################################### end of the function create_res_table | |
| 964 | |
| 965 ######################################### start of the function sorties_lm_f called by glm_community in FunctExeCalcGLMGalaxy.r | |
| 966 sorties_lm_f <- function(obj_lm, obj_lmy, tab_sum, #formule, | |
| 967 metrique, fact_ana, cut, col_ana, list_fact, list_rand, lev = NULL, d_ata, | |
| 968 log = FALSE, sufixe = NULL) { | |
| 969 ## Purpose: Form GLM and LM results | |
| 970 ## ---------------------------------------------------------------------- | |
| 971 ## Arguments: obj_lm : lm object | |
| 972 ## obj_lmy : lm object with year as continuous | |
| 973 ## tab_sum : output summary table | |
| 974 ## formule : LM formula | |
| 975 ## metrique : Chosen metric | |
| 976 ## fact_ana : separation factor | |
| 977 ## cut : level of separation factor | |
| 978 ## col_ana : colname for separation factor in output summary table | |
| 979 ## list_fact : Analysis factors list | |
| 980 ## list_rand : Analysis random factors list | |
| 981 ## levels : Levels of analysis factors list | |
| 982 ## d_ata : d_ata used for analysis | |
| 983 ## log : put log on metric ? (boolean) | |
| 984 ## sufixe : sufix for file name | |
| 985 ## ---------------------------------------------------------------------- | |
| 986 ## Author: Yves Reecht, Date: 25 août 2010, 16:19 modified by Coline ROYAUX 04 june 2020 | |
| 987 | |
| 988 tab_sum[, "Interest.var"] <- as.character(metrique) | |
| 989 sum_lm <- summary(obj_lm) | |
| 990 tab_sum[, "distribution"] <- as.character(sum_lm$family[1]) | |
| 991 | |
| 992 if (length(grep("^glmmTMB", obj_lm$call)) > 0) { #if random effects | |
| 993 tab_sum[tab_sum[, col_ana] == cut, "AIC"] <- sum_lm$AICtab[1] | |
| 994 tab_sum[tab_sum[, col_ana] == cut, "BIC"] <- sum_lm$AICtab[2] | |
| 995 tab_sum[tab_sum[, col_ana] == cut, "logLik"] <- sum_lm$AICtab[3] | |
| 996 tab_sum[tab_sum[, col_ana] == cut, "deviance"] <- sum_lm$AICtab[4] | |
| 997 tab_sum[tab_sum[, col_ana] == cut, "df.resid"] <- sum_lm$AICtab[5] | |
| 998 | |
| 999 if (! is.null(lev)) { ## if fixed effects + random effects | |
| 1000 tab_coef <- as.data.frame(sum_lm$coefficients$cond) | |
| 1001 tab_coef$signif <- lapply(tab_coef[, "Pr(>|z|)"], FUN = function(x) { | |
| 1002 if (!is.na(x) && x < 0.05) { | |
| 1003 "yes" | |
| 1004 }else{ | |
| 1005 "no" | |
| 1006 } | |
| 1007 }) | |
| 1008 | |
| 1009 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Zvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "z value"] | |
| 1010 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Pvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Pr(>|z|)"] | |
| 1011 | |
| 1012 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Zvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
| 1013 if (length(grep(x, rownames(tab_coef))) > 0) { | |
| 1014 tab_coef[grepl(x, rownames(tab_coef)), "z value"] | |
| 1015 }else{ | |
| 1016 NA | |
| 1017 } | |
| 1018 })) | |
| 1019 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Pvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
| 1020 if (length(grep(x, rownames(tab_coef))) > 0) { | |
| 1021 tab_coef[grepl(x, rownames(tab_coef)), "Pr(>|z|)"] | |
| 1022 }else{ | |
| 1023 NA | |
| 1024 } | |
| 1025 })) | |
| 1026 | |
| 1027 if (any(obj_lmy != "")) { | |
| 1028 sum_lmy <- summary(obj_lmy) | |
| 1029 tab_coefy <- as.data.frame(sum_lmy$coefficients$cond) | |
| 1030 tab_coefy$signif <- lapply(tab_coefy[, "Pr(>|z|)"], FUN = function(x) { | |
| 1031 if (!is.na(x) && x < 0.05) { | |
| 1032 "yes" | |
| 1033 }else{ | |
| 1034 "no" | |
| 1035 } | |
| 1036 }) | |
| 1037 tab_sum[tab_sum[, col_ana] == cut, "year Zvalue"] <- ifelse(length(tab_coefy["year", "z value"]) > 0, tab_coefy["year", "z value"], NA) | |
| 1038 tab_sum[tab_sum[, col_ana] == cut, "year Pvalue"] <- ifelse(length(tab_coefy["year", "Pr(>|z|)"]) > 0, tab_coefy["year", "Pr(>|z|)"], NA) | |
| 1039 } | |
| 1040 | |
| 1041 } | |
| 1042 | |
| 1043 switch(as.character(length(sum_lm$varcor$cond)), | |
| 1044 "1" = { | |
| 1045 std_d <- c(sum_lm$varcor$cond[[1]]) | |
| 1046 }, | |
| 1047 "2" = { | |
| 1048 std_d <- c(sum_lm$varcor$cond[[1]], sum_lm$varcor$cond[[2]]) | |
| 1049 }, | |
| 1050 std_d <- NULL) | |
| 1051 | |
| 1052 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(list_rand, "Std.Dev", collapse = "|"), colnames(tab_sum))] <- std_d | |
| 1053 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(list_rand, "NbObservation", collapse = "|"), colnames(tab_sum))] <- sum_lm$nobs | |
| 1054 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(list_rand, "NbLevels", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(list_rand, FUN = function(x) { | |
| 1055 nlevels(d_ata[, x]) | |
| 1056 })) | |
| 1057 | |
| 1058 }else{ ## if fixed effects only | |
| 1059 | |
| 1060 tab_sum[tab_sum[, col_ana] == cut, "AIC"] <- sum_lm$aic | |
| 1061 tab_sum[tab_sum[, col_ana] == cut, "Resid.deviance"] <- sum_lm$deviance | |
| 1062 tab_sum[tab_sum[, col_ana] == cut, "df.resid"] <- sum_lm$df.residual | |
| 1063 tab_sum[tab_sum[, col_ana] == cut, "Null.deviance"] <- sum_lm$null.deviance | |
| 1064 tab_sum[tab_sum[, col_ana] == cut, "df.null"] <- sum_lm$df.null | |
| 1065 tab_coef <- as.data.frame(sum_lm$coefficients) | |
| 1066 | |
| 1067 if (any(obj_lmy != "")) { | |
| 1068 sum_lmy <- summary(obj_lmy) | |
| 1069 tab_coefy <- as.data.frame(sum_lmy$coefficients) | |
| 1070 } | |
| 1071 | |
| 1072 if (sum_lm$family[1] == "gaussian" || sum_lm$family[1] == "quasipoisson") { | |
| 1073 | |
| 1074 tab_coef$signif <- lapply(tab_coef[, "Pr(>|t|)"], FUN = function(x) { | |
| 1075 if (!is.na(x) && x < 0.05) { | |
| 1076 "yes" | |
| 1077 }else{ | |
| 1078 "no" | |
| 1079 } | |
| 1080 }) | |
| 1081 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Tvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "t value"] | |
| 1082 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Pvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Pr(>|t|)"] | |
| 1083 | |
| 1084 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Tvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
| 1085 if (length(grep(x, rownames(tab_coef))) > 0) { | |
| 1086 tab_coef[grepl(x, rownames(tab_coef)), "t value"] | |
| 1087 }else{ | |
| 1088 NA | |
| 1089 } | |
| 1090 })) | |
| 1091 | |
| 1092 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Pvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
| 1093 if (length(grep(x, rownames(tab_coef))) > 0) { | |
| 1094 tab_coef[grepl(x, rownames(tab_coef)), "Pr(>|t|)"] | |
| 1095 }else{ | |
| 1096 NA | |
| 1097 } | |
| 1098 })) | |
| 1099 | |
| 1100 if (any(obj_lmy != "")) { | |
| 1101 tab_coefy$signif <- lapply(tab_coefy[, "Pr(>|t|)"], FUN = function(x) { | |
| 1102 if (!is.na(x) && x < 0.05) { | |
| 1103 "yes" | |
| 1104 }else{ | |
| 1105 "no" | |
| 1106 } | |
| 1107 }) | |
| 1108 tab_sum[tab_sum[, col_ana] == cut, "year Tvalue"] <- ifelse(length(tab_coefy["year", "t value"]) > 0, tab_coefy["year", "t value"], NA) | |
| 1109 tab_sum[tab_sum[, col_ana] == cut, "year Pvalue"] <- ifelse(length(tab_coefy["year", "Pr(>|z|)"]) > 0, tab_coefy["year", "Pr(>|t|)"], NA) | |
| 1110 } | |
| 1111 | |
| 1112 }else{ | |
| 1113 tab_coef$signif <- lapply(tab_coef[, "Pr(>|z|)"], FUN = function(x) { | |
| 1114 if (!is.na(x) && x < 0.05) { | |
| 1115 "yes" | |
| 1116 }else{ | |
| 1117 "no" | |
| 1118 } | |
| 1119 }) | |
| 1120 | |
| 1121 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Zvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "z value"] | |
| 1122 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Pvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Pr(>|z|)"] | |
| 1123 | |
| 1124 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Zvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
| 1125 if (length(grep(x, rownames(tab_coef))) > 0) { | |
| 1126 tab_coef[grepl(x, rownames(tab_coef)), "z value"] | |
| 1127 }else{ | |
| 1128 NA | |
| 1129 } | |
| 1130 })) | |
| 1131 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Pvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
| 1132 if (length(grep(x, rownames(tab_coef))) > 0) { | |
| 1133 tab_coef[grepl(x, rownames(tab_coef)), "Pr(>|z|)"] | |
| 1134 }else{ | |
| 1135 NA | |
| 1136 } | |
| 1137 })) | |
| 1138 | |
| 1139 if (any(obj_lmy != "")) { | |
| 1140 tab_coefy$signif <- lapply(tab_coefy[, "Pr(>|z|)"], FUN = function(x) { | |
| 1141 if (!is.na(x) && x < 0.05) { | |
| 1142 "yes" | |
| 1143 }else{ | |
| 1144 "no" | |
| 1145 } | |
| 1146 }) | |
| 1147 | |
| 1148 tab_sum[tab_sum[, col_ana] == cut, "year Zvalue"] <- ifelse(length(tab_coefy["year", "z value"]) > 0, tab_coefy["year", "z value"], NA) | |
| 1149 tab_sum[tab_sum[, col_ana] == cut, "year Pvalue"] <- ifelse(length(tab_coefy["year", "Pr(>|z|)"]) > 0, tab_coefy["year", "Pr(>|z|)"], NA) | |
| 1150 } | |
| 1151 } | |
| 1152 } | |
| 1153 | |
| 1154 if (! is.null(lev)) { ## if fixed effects | |
| 1155 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Estimate", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Estimate"] | |
| 1156 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Std.Err", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Std. Error"] | |
| 1157 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*signif", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "signif"] | |
| 1158 | |
| 1159 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Estimate", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
| 1160 if (length(grep(x, rownames(tab_coef))) > 0) { | |
| 1161 tab_coef[grepl(x, rownames(tab_coef)), "Estimate"] | |
| 1162 }else{ | |
| 1163 NA | |
| 1164 } | |
| 1165 })) | |
| 1166 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Std.Err", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
| 1167 if (length(grep(x, rownames(tab_coef))) > 0) { | |
| 1168 tab_coef[grepl(x, rownames(tab_coef)), "Std. Error"] | |
| 1169 }else{ | |
| 1170 NA | |
| 1171 } | |
| 1172 })) | |
| 1173 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "signif", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
| 1174 if (length(grep(x, rownames(tab_coef))) > 0) { | |
| 1175 tab_coef[grepl(x, rownames(tab_coef)), "signif"] | |
| 1176 }else{ | |
| 1177 NA | |
| 1178 } | |
| 1179 })) | |
| 1180 | |
| 1181 if (any(obj_lmy != "")) { | |
| 1182 tab_sum[tab_sum[, col_ana] == cut, "year Estimate"] <- ifelse(length(tab_coefy["year", "Estimate"]) > 0, tab_coefy["year", "Estimate"], NA) | |
| 1183 tab_sum[tab_sum[, col_ana] == cut, "year Std.Err"] <- ifelse(length(tab_coefy["year", "Std. Error"]) > 0, tab_coefy["year", "Std. Error"], NA) | |
| 1184 tab_sum[tab_sum[, col_ana] == cut, "year signif"] <- ifelse(length(tab_coefy["year", "signif"]) > 0, tab_coefy["year", "signif"], NA) | |
| 1185 } | |
| 1186 | |
| 1187 } | |
| 1188 | |
| 1189 ic <- tryCatch(as.data.frame(confint(obj_lm)), error = function(e) { | |
| 1190 }) | |
| 1191 | |
| 1192 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "IC_up", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
| 1193 if (length(grep(x, rownames(ic))) > 0) { | |
| 1194 ic[grepl(x, rownames(ic)), "97.5 %"] | |
| 1195 }else{ | |
| 1196 NA | |
| 1197 } | |
| 1198 })) | |
| 1199 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "IC_inf", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
| 1200 if (length(grep(x, rownames(ic))) > 0) { | |
| 1201 ic[grepl(x, rownames(ic)), "2.5 %"] | |
| 1202 }else{ | |
| 1203 NA | |
| 1204 } | |
| 1205 })) | |
| 1206 | |
| 1207 return(tab_sum) | |
| 1208 | |
| 1209 } | |
| 1210 | |
| 1211 | |
| 1212 ######################################### end of the function sorties_lm_f | |
| 1213 | |
| 1214 | |
| 1215 ######################################### start of the function note_glm_f called by glm_species and glm_community | |
| 1216 | |
| 1217 note_glm_f <- function(data, obj_lm, metric, list_fact, details = FALSE) { | |
| 1218 ## Purpose: Note your GLM analysis | |
| 1219 ## ---------------------------------------------------------------------- | |
| 1220 ## Arguments: data : d_ataframe used for analysis | |
| 1221 ## obj_lm : GLM assessed | |
| 1222 ## metric : selected metric | |
| 1223 ## list_fact : Analysis factors list | |
| 1224 ## details : detailed output ? | |
| 1225 ## ---------------------------------------------------------------------- | |
| 1226 ## Author: Coline ROYAUX, 26 june 2020 | |
| 1227 | |
| 1228 rate <- 0 | |
| 1229 detres <- list(complete_plan = NA, balanced_plan = NA, NA_proportion_OK = NA, no_residual_dispersion = NA, uniform_residuals = NA, outliers_proportion_OK = NA, no_zero_inflation = NA, observation_factor_ratio_OK = NA, enough_levels_random_effect = NA, rate = NA) | |
| 1230 | |
| 1231 #### d_ata criterions #### | |
| 1232 | |
| 1233 ## Plan | |
| 1234 | |
| 1235 plan <- as.data.frame(table(data[, list_fact])) | |
| 1236 | |
| 1237 if (nrow(plan[plan$Freq == 0, ]) < nrow(plan) * 0.1) { # +0.5 if less than 10% of possible factor's level combinations aren't represented in the sampling scheme | |
| 1238 rate <- rate + 0.5 | |
| 1239 detres$complete_plan <- TRUE | |
| 1240 | |
| 1241 if (summary(as.factor(plan$Freq))[1] > nrow(plan) * 0.9) { # +0.5 if the frequency of the most represented frequency of possible factor's levels combinations is superior to 90% of the total number of possible factor's levels combinations | |
| 1242 rate <- rate + 0.5 | |
| 1243 detres$balanced_plan <- TRUE | |
| 1244 } | |
| 1245 | |
| 1246 }else{ | |
| 1247 detres$complete_plan <- FALSE | |
| 1248 detres$balanced_plan <- FALSE | |
| 1249 } | |
| 1250 | |
| 1251 if (nrow(data) - nrow(na.omit(data)) < nrow(data) * 0.1) { # +1 if less than 10% of the lines in the dataframe bares a NA | |
| 1252 rate <- rate + 1 | |
| 1253 detres["NA_proportion_OK"] <- TRUE | |
| 1254 }else{ | |
| 1255 detres["NA_proportion_OK"] <- FALSE | |
| 1256 } | |
| 1257 | |
| 1258 #### Model criterions #### | |
| 1259 | |
| 1260 if (length(grep("quasi", obj_lm$family)) == 0) { #DHARMa doesn't work with quasi distributions | |
| 1261 | |
| 1262 residuals <- DHARMa::simulateResiduals(obj_lm) | |
| 1263 | |
| 1264 capture.output(test_res <- DHARMa::testResiduals(residuals)) | |
| 1265 test_zero <- DHARMa::testZeroInflation(residuals) | |
| 1266 | |
| 1267 ## dispersion of residuals | |
| 1268 | |
| 1269 if (test_res$dispersion$p.value > 0.05) { # +1.5 if dispersion tests not significative | |
| 1270 rate <- rate + 1.5 | |
| 1271 detres$no_residual_dispersion <- TRUE | |
| 1272 }else{ | |
| 1273 detres$no_residual_dispersion <- FALSE | |
| 1274 } | |
| 1275 | |
| 1276 ## uniformity of residuals | |
| 1277 | |
| 1278 if (test_res$uniformity$p.value > 0.05) { # +1 if uniformity tests not significative | |
| 1279 rate <- rate + 1 | |
| 1280 detres$uniform_residuals <- TRUE | |
| 1281 }else{ | |
| 1282 detres$uniform_residuals <- FALSE | |
| 1283 } | |
| 1284 | |
| 1285 ## residuals outliers | |
| 1286 | |
| 1287 if (test_res$outliers$p.value > 0.05) { # +0.5 if outliers tests not significative | |
| 1288 rate <- rate + 0.5 | |
| 1289 detres["outliers_proportion_OK"] <- TRUE | |
| 1290 }else{ | |
| 1291 detres["outliers_proportion_OK"] <- FALSE | |
| 1292 } | |
| 1293 | |
| 1294 ## Zero inflation test | |
| 1295 | |
| 1296 if (test_zero$p.value > 0.05) { # +1 if zero inflation tests not significative | |
| 1297 rate <- rate + 1 | |
| 1298 detres$no_zero_inflation <- TRUE | |
| 1299 }else{ | |
| 1300 detres$no_zero_inflation <- FALSE | |
| 1301 } | |
| 1302 | |
| 1303 ## Factors/observations ratio | |
| 1304 | |
| 1305 if (length(list_fact) / nrow(na.omit(data)) < 0.1) { # +1 if quantity of factors is less than 10% of the quantity of observations | |
| 1306 rate <- rate + 1 | |
| 1307 detres["observation_factor_ratio_OK"] <- TRUE | |
| 1308 }else{ | |
| 1309 detres["observation_factor_ratio_OK"] <- FALSE | |
| 1310 } | |
| 1311 | |
| 1312 ## less than 10 factors' level on random effect | |
| 1313 | |
| 1314 if (length(grep("^glmmTMB", obj_lm$call)) > 0) { | |
| 1315 nlev_rand <- c() | |
| 1316 for (fact in names(summary(obj_lm)$varcor$cond)) { | |
| 1317 nlev_rand <- c(nlev_rand, length(unlist(unique(data[, fact])))) | |
| 1318 } | |
| 1319 | |
| 1320 if (all(nlev_rand > 10)) { # +1 if more than 10 levels in one random effect | |
| 1321 rate <- rate + 1 | |
| 1322 detres$enough_levels_random_effect <- TRUE | |
| 1323 }else{ | |
| 1324 detres$enough_levels_random_effect <- FALSE | |
| 1325 } | |
| 1326 } | |
| 1327 | |
| 1328 detres$rate <- rate | |
| 1329 | |
| 1330 if (details) { | |
| 1331 return(detres) | |
| 1332 }else{ | |
| 1333 return(rate) | |
| 1334 } | |
| 1335 | |
| 1336 }else{ | |
| 1337 return(NA) | |
| 1338 cat("Models with quasi distributions can't be rated for now") | |
| 1339 } | |
| 1340 } | |
| 1341 | |
| 1342 ######################################### end of the function note_glm_f | |
| 1343 | |
| 1344 ######################################### start of the function note_glms_f called by glm_species and glm_community | |
| 1345 | |
| 1346 note_glms_f <- function(tab_rate, expr_lm, obj_lm, file_out = FALSE) { | |
| 1347 ## Purpose: Note your GLM analysis | |
| 1348 ## ---------------------------------------------------------------------- | |
| 1349 ## Arguments: tab_rate : rates table from note_glm_f | |
| 1350 ## expr_lm : GLM expression assessed | |
| 1351 ## obj_lm : GLM object | |
| 1352 ## file_out : Output as file ? else global rate only | |
| 1353 ## ---------------------------------------------------------------------- | |
| 1354 ## Author: Coline ROYAUX, 26 june 2020 | |
| 1355 namefile <- "RatingGLM.txt" | |
| 1356 | |
| 1357 if (length(grep("quasi", obj_lm$family)) == 0) { #DHARMa doesn't work with quasi distributions | |
| 1358 | |
| 1359 rate_m <- median(na.omit(tab_rate[, "rate"])) | |
| 1360 sum <- summary(obj_lm) | |
| 1361 | |
| 1362 if (length(grep("^glmmTMB", obj_lm$call)) > 0) { | |
| 1363 if (median(na.omit(tab_rate[, "rate"])) >= 6) { # if 50% has a rate superior or equal to 6 +1 | |
| 1364 rate_m <- rate_m + 1 | |
| 1365 } | |
| 1366 | |
| 1367 if (quantile(na.omit(tab_rate[, "rate"]), probs = 0.9) >= 6) { # if 90% has a rate superior or equal to 6 +1 | |
| 1368 rate_m <- rate_m + 1 | |
| 1369 } | |
| 1370 }else{ | |
| 1371 if (median(na.omit(tab_rate[, "rate"])) >= 5) { # if 50% has a rate superior or equal to 5 +1 | |
| 1372 rate_m <- rate_m + 1 | |
| 1373 } | |
| 1374 | |
| 1375 if (quantile(na.omit(tab_rate[, "rate"]), probs = 0.9) >= 5) { # if 90% has a rate superior or equal to 5 +1 | |
| 1376 rate_m <- rate_m + 1 | |
| 1377 } | |
| 1378 } | |
| 1379 | |
| 1380 if (file_out) { | |
| 1381 | |
| 1382 cat("###########################################################################", | |
| 1383 "\n########################### Analysis evaluation ###########################", | |
| 1384 "\n###########################################################################", file = namefile, fill = 1, append = TRUE) | |
| 1385 | |
| 1386 ## Informations on model : | |
| 1387 cat("\n\n######################################### \nFitted model:", file = namefile, fill = 1, append = TRUE) | |
| 1388 cat("\t", deparse(expr_lm), "\n\n", file = namefile, sep = "", append = TRUE) | |
| 1389 cat("Family: ", sum$family[[1]], | |
| 1390 file = namefile, append = TRUE) | |
| 1391 cat("\n\nNumber of analysis: ", nrow(tab_rate), file = namefile, append = TRUE) | |
| 1392 | |
| 1393 ## Global rate : | |
| 1394 cat("\n\n######################################### \nGlobal rate for all analysis:", | |
| 1395 "\n\n", rate_m, "out of 10", file = namefile, append = TRUE) | |
| 1396 | |
| 1397 ## details on every GLM : | |
| 1398 | |
| 1399 cat("\n\n######################################### \nDetails on every analysis:\n\n", file = namefile, append = TRUE) | |
| 1400 cat("Analysis\tC1\tC2\tC3\tC4\tC5\tC6\tC7\tC8\tC9\tFinal rate", file = namefile, append = TRUE) | |
| 1401 apply(tab_rate, 1, FUN = function(x) { | |
| 1402 | |
| 1403 if (!is.na(x["complete_plan"]) && x["complete_plan"] == TRUE) { | |
| 1404 cat("\n", x[1], "\tyes", file = namefile, append = TRUE) | |
| 1405 }else{ | |
| 1406 cat("\n", x[1], "\tno", file = namefile, append = TRUE) | |
| 1407 } | |
| 1408 | |
| 1409 for (i in c("balanced_plan", "NA_proportion_OK", "no_residual_dispersion", "uniform_residuals", "outliers_proportion_OK", "no_zero_inflation", "observation_factor_ratio_OK", "enough_levels_random_effect")) { | |
| 1410 if (!is.na(x[i]) && x[i] == TRUE) { | |
| 1411 cat("\tyes", file = namefile, append = TRUE) | |
| 1412 }else{ | |
| 1413 cat("\tno", file = namefile, append = TRUE) | |
| 1414 } | |
| 1415 } | |
| 1416 | |
| 1417 cat("\t", x["rate"], "/ 8", file = namefile, append = TRUE) | |
| 1418 | |
| 1419 | |
| 1420 }) | |
| 1421 cat("\n\nC1: Complete plan?\nC2: Balanced plan?\nC3: Few NA?\nC4: Regular dispersion?\nC5: Uniform residuals?\nC6: Regular outliers proportion?\nC7: No zero-inflation?\nC8: Good observation/factor ratio?\nC9: Enough levels on random effect?", file = namefile, append = TRUE) | |
| 1422 | |
| 1423 ## Red flags - advice : | |
| 1424 cat("\n\n######################################### \nRed flags - advice:\n\n", file = namefile, append = TRUE) | |
| 1425 if (all(na.omit(tab_rate["NA_proportion_OK"]) == FALSE)) { | |
| 1426 cat("\n", "\t- More than 10% of lines of your dataset contains NAs", file = namefile, append = TRUE) | |
| 1427 } | |
| 1428 | |
| 1429 if (length(grep("FALSE", tab_rate["no_residual_dispersion"])) / length(na.omit(tab_rate["no_residual_dispersion"])) > 0.5) { | |
| 1430 cat("\n", "\t- More than 50% of your analyses are over- or under- dispersed : Try with another distribution family", file = namefile, append = TRUE) | |
| 1431 } | |
| 1432 | |
| 1433 if (length(grep("FALSE", tab_rate["uniform_residuals"])) / length(na.omit(tab_rate["uniform_residuals"])) > 0.5) { | |
| 1434 cat("\n", "\t- More than 50% of your analyses haven't an uniform distribution of residuals : Try with another distribution family", file = namefile, append = TRUE) | |
| 1435 } | |
| 1436 | |
| 1437 if (length(grep("FALSE", tab_rate["outliers_proportion_OK"])) / length(na.omit(tab_rate["outliers_proportion_OK"])) > 0.5) { | |
| 1438 cat("\n", "\t- More than 50% of your analyses have too much outliers : Try with another distribution family or try to select or filter your data", file = namefile, append = TRUE) | |
| 1439 } | |
| 1440 | |
| 1441 if (length(grep("FALSE", tab_rate["no_zero_inflation"])) / length(na.omit(tab_rate["no_zero_inflation"])) > 0.5) { | |
| 1442 cat("\n", "\t- More than 50% of your analyses have zero inflation : Try to select or filter your data", file = namefile, append = TRUE) | |
| 1443 } | |
| 1444 | |
| 1445 if (length(grep("FALSE", tab_rate["observation_factor_ratio_OK"])) / length(na.omit(tab_rate["observation_factor_ratio_OK"])) > 0.5) { | |
| 1446 cat("\n", "\t- More than 50% of your analyses have not enough observations for the amount of factors : Try to use less factors in your analysis or try to use another separation factor", file = namefile, append = TRUE) | |
| 1447 } | |
| 1448 | |
| 1449 if (any(tab_rate["enough_levels_random_effect"] == FALSE, na.rm = TRUE) && length(grep("^glmmTMB", obj_lm$call)) > 0) { | |
| 1450 cat("\n", "\t- Random effect hasn't enough levels to be robust : If it has less than ten levels remove the random effect", file = namefile, append = TRUE) | |
| 1451 } | |
| 1452 }else{ | |
| 1453 | |
| 1454 return(rate_m) | |
| 1455 | |
| 1456 } | |
| 1457 }else{ | |
| 1458 cat("Models with quasi distributions can't be rated for now", file = namefile, append = TRUE) | |
| 1459 } | |
| 1460 } | |
| 1461 | |
| 1462 ######################################### end of the function note_glm_f | |
| 1463 | |
| 1464 ######################################### start of the function info_stats_f called by glm_species and glm_community | |
| 1465 | |
| 1466 info_stats_f <- function(filename, d_ata, agreg_level = c("species", "unitobs"), type = c("graph", "stat"), | |
| 1467 metrique, fact_graph, fact_graph_sel, list_fact, list_fact_sel) { | |
| 1468 ## Purpose: informations and simple statistics | |
| 1469 ## ---------------------------------------------------------------------- | |
| 1470 ## Arguments: filename : name of file | |
| 1471 ## d_ata : input data | |
| 1472 ## agreg_level : aggregation level | |
| 1473 ## type : type of function calling | |
| 1474 ## metrique : selected metric | |
| 1475 ## fact_graph : selection factor | |
| 1476 ## fact_graph_sel : list of factors levels selected for this factor | |
| 1477 ## list_fact : list of grouping factors | |
| 1478 ## list_fact_sel : list of factors levels selected for these factors | |
| 1479 ## ---------------------------------------------------------------------- | |
| 1480 ## Author: Yves Reecht, Date: 10 sept. 2012, 15:26 modified by Coline ROYAUX 04 june 2020 | |
| 1481 | |
| 1482 ## Open file : | |
| 1483 f_ile <- file(description = filename, | |
| 1484 open = "w", encoding = "latin1") | |
| 1485 | |
| 1486 ## if error : | |
| 1487 on.exit(if (exists("filename") && | |
| 1488 tryCatch(isOpen(f_ile), | |
| 1489 error = function(e)return(FALSE))) close(f_ile)) | |
| 1490 | |
| 1491 ## Metrics and factors infos : | |
| 1492 print_selection_info_f(metrique = metrique, #fact_graph = fact_graph, fact_graph_sel = fact_graph_sel, | |
| 1493 list_fact = list_fact, #list_fact_sel = list_fact_sel, | |
| 1494 f_ile = f_ile, | |
| 1495 agreg_level = agreg_level, type = type) | |
| 1496 | |
| 1497 ## statistics : | |
| 1498 if (class(d_ata) == "list") { | |
| 1499 cat("\n###################################################", | |
| 1500 "\nStatistics per level of splitting factor:\n", | |
| 1501 sep = "", file = f_ile, append = TRUE) | |
| 1502 | |
| 1503 invisible(sapply(seq_len(length(d_ata)), | |
| 1504 function(i) { | |
| 1505 print_stats_f(d_ata = d_ata[[i]], metrique = metrique, list_fact = list_fact, f_ile = f_ile, | |
| 1506 headline = fact_graph_sel[i]) | |
| 1507 })) | |
| 1508 }else{ | |
| 1509 print_stats_f(d_ata = d_ata, metrique = metrique, list_fact = list_fact, f_ile = f_ile, | |
| 1510 headline = NULL) | |
| 1511 } | |
| 1512 | |
| 1513 ## Close file : | |
| 1514 close(f_ile) | |
| 1515 | |
| 1516 } | |
| 1517 | |
| 1518 ######################################### end of the function info_stats_f | |
| 1519 | |
| 1520 | |
| 1521 ######################################### start of the function print_selection_info_f called by info_stats_f | |
| 1522 | |
| 1523 print_selection_info_f <- function(metrique, list_fact, | |
| 1524 f_ile, | |
| 1525 agreg_level = c("species", "unitobs"), type = c("graph", "stat")) { | |
| 1526 ## Purpose: Write data informations | |
| 1527 ## ---------------------------------------------------------------------- | |
| 1528 ## Arguments: metrique : chosen metric | |
| 1529 ## list_fact : factor's list | |
| 1530 ## f_ile : Results file name | |
| 1531 ## agreg_level : aggregation level | |
| 1532 ## type : function type | |
| 1533 ## ---------------------------------------------------------------------- | |
| 1534 ## Author: Yves Reecht, Date: 11 sept. 2012, 10:41 modified by Coline ROYAUX 04 june 2020 | |
| 1535 | |
| 1536 cat("\n##################################################\n", | |
| 1537 "Metrics and factors (and possible units/selections):\n", | |
| 1538 sep = "", file = f_ile, append = TRUE) | |
| 1539 | |
| 1540 ## metric info : | |
| 1541 cat("\n Metrics:", metrique, | |
| 1542 "\n", file = f_ile, append = TRUE) | |
| 1543 | |
| 1544 ## Clustering factors : | |
| 1545 if (is.element(agreg_level, c("spCL_unitobs", "spCL_espece", "spSpecies", "spEspece", | |
| 1546 "spUnitobs", "spUnitobs(CL)"))) { | |
| 1547 type <- "spatialGraph" | |
| 1548 } | |
| 1549 | |
| 1550 cat(switch(type, | |
| 1551 "graph" = "\nGrouping factor(s): \n * ", | |
| 1552 "stat" = "\nAnalyses factor(s): \n * ", | |
| 1553 "spatialGraph" = "\nSpatial aggregation factor(s): \n * "), | |
| 1554 paste(list_fact, collaspe = "\n * "), "\n", file = f_ile, append = TRUE) | |
| 1555 | |
| 1556 } | |
| 1557 | |
| 1558 ######################################### end of the function print_selection_info_f | |
| 1559 | |
| 1560 | |
| 1561 ######################################### start of the function print_stats_f called by info_stats_f | |
| 1562 | |
| 1563 print_stats_f <- function(d_ata, metrique, list_fact, f_ile, headline = NULL) { | |
| 1564 ## Purpose: Write general statistics table | |
| 1565 ## ---------------------------------------------------------------------- | |
| 1566 ## Arguments: d_ata : Analysis data | |
| 1567 ## metrique : metric's name | |
| 1568 ## list_fact : Factor's list | |
| 1569 ## f_ile : Simple statistics file name | |
| 1570 ## ---------------------------------------------------------------------- | |
| 1571 ## Author: Yves Reecht, Date: 11 sept. 2012, 10:09 modified by Coline ROYAUX 04 june 2020 | |
| 1572 | |
| 1573 ## Header : | |
| 1574 if (! is.null(headline)) { | |
| 1575 cat("\n", rep("#", nchar(headline) + 3), "\n", | |
| 1576 "## ", headline, "\n", | |
| 1577 sep = "", file = f_ile, append = TRUE) | |
| 1578 } | |
| 1579 | |
| 1580 cat("\n########################\nBase statistics:\n\n", file = f_ile, append = TRUE) | |
| 1581 | |
| 1582 capture.output(print(summary_fr(d_ata[, metrique])), file = f_ile, append = TRUE) | |
| 1583 | |
| 1584 if (! is.null(list_fact)) { | |
| 1585 cat("\n#########################################", | |
| 1586 "\nStatistics per combination of factor levels:\n\n", file = f_ile, sep = "", append = TRUE) | |
| 1587 | |
| 1588 ## Compute summary for each existing factor's cross : | |
| 1589 res <- with(d_ata, | |
| 1590 tapply(eval(parse(text = metrique)), | |
| 1591 INDEX = do.call(paste, | |
| 1592 c(lapply(list_fact, | |
| 1593 function(y)eval(parse(text = y))), | |
| 1594 sep = ".")), | |
| 1595 FUN = summary_fr)) | |
| 1596 | |
| 1597 ## results in table | |
| 1598 capture.output(print(do.call(rbind, res)), | |
| 1599 file = f_ile, append = TRUE) | |
| 1600 } | |
| 1601 | |
| 1602 ## empty line : | |
| 1603 cat("\n", file = f_ile, append = TRUE) | |
| 1604 } | |
| 1605 | |
| 1606 ######################################### end of the function print_stats_f | |
| 1607 | |
| 1608 | |
| 1609 ######################################### start of the function summary_fr called by print_stats_f | |
| 1610 summary_fr <- function(object, digits = max(3, getOption("digits") - 3), ...) { | |
| 1611 ## Purpose: Adding SD and N to summary | |
| 1612 ## ---------------------------------------------------------------------- | |
| 1613 ## Arguments: object : Object to summarise | |
| 1614 ## ---------------------------------------------------------------------- | |
| 1615 ## Author: Yves Reecht, Date: 13 sept. 2012, 15:47 modified by Coline ROYAUX 04 june 2020 | |
| 1616 | |
| 1617 if (! is.numeric(object)) stop("Programming error") | |
| 1618 | |
| 1619 ## Compute summary : | |
| 1620 res <- c(summary(object = object, digits, ...), "sd" = signif(sd(x = object), digits = digits), "N" = length(object)) | |
| 1621 | |
| 1622 return(res) | |
| 1623 } | |
| 1624 | |
| 1625 ######################################### start of the function summary_fr |
