comparison FunctPAMPAGalaxy.r @ 0:3ab852a7ff53 draft

"planemo upload for repository https://github.com/ColineRoyaux/PAMPA-Galaxy commit 04381ca7162ec3ec68419e308194b91d11cacb04"
author ecology
date Mon, 16 Nov 2020 11:02:43 +0000
parents
children 5ddca052c314
comparison
equal deleted inserted replaced
-1:000000000000 0:3ab852a7ff53
1 #Rscript
2
3
4 ##################################################################################################################################
5 ####################### PAMPA Galaxy tools functions : Calculate metrics, compute GLM and plot #################################
6 ##################################################################################################################################
7
8 #### Based on Yves Reecht R script
9 #### Modified by Coline ROYAUX for integrating within Galaxy-E
10
11 ######################################### start of the function fact.def.f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r
12 ####### Define the finest aggregation with the observation table
13
14 fact_det_f <- function(obs,
15 size_class = "size.class",
16 code_species = "species.code",
17 unitobs = "observation.unit") {
18 if (any(is.element(c(size_class), colnames(obs))) && all(! is.na(obs[, size_class]))) {
19 factors <- c(unitobs, code_species, size_class)
20 }else{
21 factors <- c(unitobs, code_species)
22 }
23 return(factors)
24 }
25
26 ######################################### end of the function fact.def.f
27
28 ######################################### start of the function def_typeobs_f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r
29 ####### Define observation type from colnames
30
31 def_typeobs_f <- function(obs) {
32 if (any(is.element(c("rotation", "rot", "rotate"), colnames(obs)))) {
33 obs_type <- "SVR"
34 }else{
35 obs_type <- "other"
36 }
37 return(obs_type)
38 }
39 ######################################### end of the function fact.def.f
40
41 ######################################### start of the function create_unitobs called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r
42 ####### Create unitobs column when inexistant
43 create_unitobs <- function(data, year = "year", location = "location", unitobs = "observation.unit") {
44 if (is.element(paste(unitobs), colnames(data))) {
45 unitab <- data
46 }else{
47
48 unitab <- tidyr::unite(data, col = "observation.unit", c(year, location))
49 }
50 return(unitab)
51 }
52 ######################################### start of the function create_unitobs
53
54 ######################################### start of the function create_year_location called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r
55 ####### separate unitobs column when existant
56 create_year_location <- function(data, year = "year", location = "location", unitobs = "observation.unit") {
57 if (all(grepl("[1-2][0|8|9][0-9]{2}_.*", data[, unitobs])) == TRUE) {
58 tab <- tidyr::separate(data, col = unitobs, into = c(year, location), sep = "_")
59 }else{
60 if (all(grepl("[A-Z]{2}[0-9]{2}.*", data[, unitobs]) == TRUE)) {
61 tab <- tidyr::separate(data, col = unitobs, into = c("site1", year, "obs"), sep = c(2, 4))
62 tab <- tidyr::unite(tab, col = location, c("site1", "obs"))
63 }else{
64 tab <- data
65 }
66 }
67
68 tab <- cbind(tab, observation.unit = data[, unitobs])
69
70 return(tab)
71 }
72 ######################################### start of the function create_year_location
73
74 ######################################### start of the function check_file called by every Galaxy Rscripts
75
76 check_file <- function(dataset, err_msg, vars, nb_vars) {
77
78 ## Purpose: General function to check integrity of input file. Will
79 ## check numbers and contents of variables(colnames).
80 ## return an error message and exit if mismatch detected
81 ## ----------------------------------------------------------------------
82 ## Arguments: dataset : dataset name
83 ## err_msg : output error
84 ## vars : expected name of variables
85 ## nb_vars : expected number of variables
86 ## ----------------------------------------------------------------------
87 ## Author: Alan Amosse, Benjamin Yguel
88
89 if (ncol(dataset) < nb_vars) { #checking for right number of columns in the file if not = error message
90 cat("\nerr nb var\n")
91 stop(err_msg, call. = FALSE)
92 }
93
94 for (i in vars) {
95 if (!(i %in% names(dataset))) { #checking colnames
96 stop(err_msg, call. = FALSE)
97 }
98 }
99 }
100
101 ######################################### end of the function check_file
102
103
104 ######################################### start of the function stat_rotations_nb_f called by calc_numbers_f
105
106 stat_rotations_nb_f <- function(factors, obs) {
107 ## Purpose: Computing abundance statistics by rotation (max, sd)
108 ## on SVR data
109 ## ----------------------------------------------------------------------
110 ## Arguments: factors : Names of aggregation factors
111 ## obs : observation data
112 ## ----------------------------------------------------------------------
113 ## Author: Yves Reecht, Date: 29 oct. 2012, 16:01 modified by Coline ROYAUX 04 june 2020
114
115 ## Identification of valid rotations :
116 if (is.element("observation.unit", factors)) {
117 ## valid rotations (empty must be there as well) :
118 rotations <- tapply(obs$rotation,
119 as.list(obs[, c("observation.unit", "rotation"), drop = FALSE]),
120 function(x)length(x) > 0)
121
122 ## Changing NA rotations in FALSE :
123 rotations[is.na(rotations)] <- FALSE
124 }
125
126 ## ###########################################################
127 ## Abundance per rotation at chosen aggregation factors :
128 nombres_rot <- tapply(obs$number,
129 as.list(obs[, c(factors, "rotation"), drop = FALSE]),
130 function(x, ...) {
131 ifelse(all(is.na(x)), NA, sum(x, ...))
132 },
133 na.rm = TRUE)
134
135 ## If valid rotation NA are considered 0 :
136 nombres_rot <- sweep(nombres_rot,
137 match(names(dimnames(rotations)), names(dimnames(nombres_rot)), nomatch = NULL),
138 rotations, # Tableau des secteurs valides (booléens).
139 function(x, y) {
140 x[is.na(x) & y] <- 0 # Lorsque NA et secteur valide => 0.
141 return(x)
142 })
143
144 ## ##################################################
145 ## Statistics :
146
147 ## Means :
148 nb_mean <- apply(nombres_rot, which(is.element(names(dimnames(nombres_rot)), factors)),
149 function(x, ...) {
150 ifelse(all(is.na(x)), NA, mean(x, ...))
151 }, na.rm = TRUE)
152
153 ## Maxima :
154 nb_max <- apply(nombres_rot, which(is.element(names(dimnames(nombres_rot)), factors)),
155 function(x, ...) {
156 ifelse(all(is.na(x)), NA, max(x, ...))
157 }, na.rm = TRUE)
158
159 ## SD :
160 nb_sd <- apply(nombres_rot, which(is.element(names(dimnames(nombres_rot)), factors)),
161 function(x, ...) {
162 ifelse(all(is.na(x)), NA, sd(x, ...))
163 }, na.rm = TRUE)
164
165 ## Valid rotations count :
166 nombres_rotations <- apply(rotations, 1, sum, na.rm = TRUE)
167
168 ## Results returned as list :
169 return(list(nb_mean = nb_mean, nb_max = nb_max, nb_sd = nb_sd,
170 nombres_rotations = nombres_rotations, nombresTot = nombres_rot))
171 }
172
173 ######################################### end of the function stat_rotations_nb_f
174
175 ######################################### start of the function calc_nb_default_f called by calc_numbers_f
176
177 calc_nb_default_f <- function(obs,
178 factors = c("observation.unit", "species.code", "size.class"),
179 nb_name = "number") {
180 ## Purpose : Compute abundances at finest aggregation
181 ## ---------------------------------------------------------------------
182 ## Arguments: obs : observation table
183 ## factors : aggregation factors
184 ## nb_name : name of abundance column.
185 ##
186 ## Output: array with ndimensions = nfactors.
187 ## ----------------------------------------------------------------------
188 ## Author: Yves Reecht, Date: 19 déc. 2011, 13:38 modified by Coline ROYAUX 04 june 2020
189
190 ## Sum individuals number :
191 nbr <- tapply(obs[, nb_name],
192 as.list(obs[, factors]),
193 sum, na.rm = TRUE)
194
195 ## Absences as "true zero" :
196 nbr[is.na(nbr)] <- 0
197
198 return(nbr)
199 }
200
201 ######################################### end of the function calc_nb_default_f
202
203 ######################################### start of the function calc_numbers_f
204
205 calc_numbers_f <- function(obs, obs_type = "", factors = c("observation.unit", "species.code", "size.class"), nb_name = "number") {
206 ## Purpose: Produce data.frame used as table from output of calc_nb_default_f().
207 ## ----------------------------------------------------------------------
208 ## Arguments: obs : observation table
209 ## obs_type : Type of observation (SVR, LIT, ...)
210 ## factors : aggregation factors
211 ## nb_name : name of abundance column
212 ##
213 ## Output: data.frame with (N aggregation factors + 1) columns
214 ## ----------------------------------------------------------------------
215 ## Author: Yves Reecht, Date: 19 déc. 2011, 13:46 modified by Coline ROYAUX 04 june 2020
216
217 if (obs_type == "SVR") {
218 ## Compute SVR abundances statistics :
219 stat_rotations <- stat_rotations_nb_f(factors = factors,
220 obs = obs)
221
222 ## Mean for rotating videos (3 rotations at most times) :
223 nbr <- stat_rotations[["nb_mean"]]
224
225 }else{
226
227 nbr <- calc_nb_default_f(obs, factors, nb_name)
228 }
229
230 res <- as.data.frame(as.table(nbr), responseName = nb_name)
231
232 if (is.element("size.class", colnames(res))) {
233 res$size.class[res$size.class == ""] <- NA
234 }
235
236 ## If integer abundances :
237 if (isTRUE(all.equal(res[, nb_name], as.integer(res[, nb_name])))) {
238 res[, nb_name] <- as.integer(res[, nb_name])
239 }
240
241 if (obs_type == "SVR") {
242 ## statistics on abundances :
243 res[, "number.max"] <- as.vector(stat_rotations[["nb_max"]])
244 res[, "number.sd"] <- as.vector(stat_rotations[["nb_sd"]])
245
246 }
247
248 return(res)
249 }
250
251 ######################################### end of the function calc_numbers_f
252
253 ######################################### start of the function pres_abs_f called by calc_biodiv_f
254
255 pres_abs_f <- function(nombres, logical = FALSE) {
256 ## Purpose: Compute presence absence from abundances
257 ## ----------------------------------------------------------------------
258 ## Arguments: nombres : vector of individuals count.
259 ## logical : (boolean) results as boolean or 0/1 ?
260 ## ----------------------------------------------------------------------
261 ## Author: Yves Reecht, Date: 29 oct. 2010, 10:20 modified by Coline ROYAUX 04 june 2020
262
263 if (any(nombres < 0, na.rm = TRUE)) {
264 stop("Negative abundances!")
265 }
266
267 if (logical) {
268 return(nombres > 0)
269 }else{
270 nombres[nombres > 0] <- 1
271 return(nombres)
272 }
273 }
274
275 ######################################### end of the function pres_abs_f
276
277 ######################################### start of the function bettercbind called by agregations_generic_f
278
279 bettercbind <- function(..., df_list = NULL, deparse.level = 1) {
280 ## Purpose: Apply cbind to data frame with mathcing columns but without
281 ## redundancies.
282 ## ----------------------------------------------------------------------
283 ## Arguments: same as cbind...
284 ## df_list : data.frames list
285 ## ----------------------------------------------------------------------
286 ## Author: Yves Reecht, Date: 17 janv. 2012, 21:10 modified by Coline ROYAUX 04 june 2020
287
288 if (is.null(df_list)) {
289 df_list <- list(...)
290 }
291
292 return(do.call(cbind,
293 c(list(df_list[[1]][, c(tail(colnames(df_list[[1]]), -1),
294 head(colnames(df_list[[1]]), 1))]),
295 lapply(df_list[- 1],
296 function(x, coldel) {
297 return(x[, !is.element(colnames(x),
298 coldel),
299 drop = FALSE])
300 },
301 coldel = colnames(df_list[[1]])),
302 deparse.level = deparse.level)))
303 }
304
305 ######################################### end of the function bettercbind
306
307 ######################################### start of the function agregation_f called by agregations_generic_f
308
309 agregation_f <- function(metric, d_ata, factors, cas_metric,
310 nb_name = "number") {
311 ## Purpose: metric aggregation
312 ## ----------------------------------------------------------------------
313 ## Arguments: metric: colnames of chosen metric
314 ## d_ata: Unaggregated data table
315 ## factors: aggregation factors vector
316 ## cas_metric: named vector of observation types depending
317 ## on chosen metric
318 ## nb_name : abundance column name
319 ## ----------------------------------------------------------------------
320 ## Author: Yves Reecht, Date: 20 déc. 2011, 14:29 modified by Coline ROYAUX 04 june 2020
321
322 switch(cas_metric[metric],
323 "sum" = {
324 res <- tapply(d_ata[, metric],
325 as.list(d_ata[, factors, drop = FALSE]),
326 function(x) {
327 ifelse(all(is.na(x)),
328 NA,
329 sum(x, na.rm = TRUE))
330 })
331 },
332 "w.mean" = {
333 res <- tapply(seq_len(nrow(d_ata)),
334 as.list(d_ata[, factors, drop = FALSE]),
335 function(ii) {
336 ifelse(all(is.na(d_ata[ii, metric])),
337 NA,
338 weighted.mean(d_ata[ii, metric],
339 d_ata[ii, nb_name],
340 na.rm = TRUE))
341 })
342 },
343 "w.mean.colonies" = {
344 res <- tapply(seq_len(nrow(d_ata)),
345 as.list(d_ata[, factors, drop = FALSE]),
346 function(ii) {
347 ifelse(all(is.na(d_ata[ii, metric])),
348 NA,
349 weighted.mean(d_ata[ii, metric],
350 d_ata[ii, "colonies"],
351 na.rm = TRUE))
352 })
353 },
354 "w.mean.prop" = {
355 res <- tapply(seq_len(nrow(d_ata)),
356 as.list(d_ata[, factors, drop = FALSE]),
357 function(ii) {
358 ifelse(all(is.na(d_ata[ii, metric])) || sum(d_ata[ii, "nombre.tot"], na.rm = TRUE) == 0,
359 NA,
360 ifelse(all(na.omit(d_ata[ii, metric]) == 0),
361 0,
362 (sum(d_ata[ii, nb_name][!is.na(d_ata[ii, metric])], na.rm = TRUE) /
363 sum(d_ata[ii, "nombre.tot"], na.rm = TRUE)) *
364 ## Correction if size class isn't an aggregation factor
365 ## (otherwise value divided by number of present classes) :
366 ifelse(is.element("size.class", factors),
367 100,
368 100 * length(unique(d_ata$size.class)))))
369 })
370
371 },
372 "w.mean.prop.bio" = {
373 res <- tapply(seq_len(nrow(d_ata)),
374 as.list(d_ata[, factors, drop = FALSE]),
375 function(ii) {
376 ifelse(all(is.na(d_ata[ii, metric])) || sum(d_ata[ii, "tot.biomass"], na.rm = TRUE) == 0,
377 NA,
378 ifelse(all(na.omit(d_ata[ii, metric]) == 0),
379 0,
380 (sum(d_ata[ii, "biomass"][!is.na(d_ata[ii, metric])], na.rm = TRUE) /
381 sum(d_ata[ii, "tot.biomass"], na.rm = TRUE)) *
382 ## Correction if size class isn't an aggregation factor
383 ## (otherwise value divided by number of present classes) :
384 ifelse(is.element("size.class", factors),
385 100,
386 100 * length(unique(d_ata$size.class)))))
387 })
388
389 },
390 "pres" = {
391 res <- tapply(d_ata[, metric],
392 as.list(d_ata[, factors, drop = FALSE]),
393 function(x) {
394 ifelse(all(is.na(x)), # When only NAs.
395 NA,
396 ifelse(any(x > 0, na.rm = TRUE), # Otherwise...
397 1, # ... presence if at least one observation in the group.
398 0))
399 })
400 },
401 "nbMax" = {
402
403 ## Sum by factor cross / rotation :
404 nb_tmp2 <- apply(nb_tmp,
405 which(is.element(names(dimnames(nb_tmp)), c(factors, "rotation"))),
406 function(x) {
407 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE))
408 })
409
410 ## Sum by factor cross :
411 res <- as.array(apply(nb_tmp2,
412 which(is.element(names(dimnames(nb_tmp)), factors)),
413 function(x) {
414 ifelse(all(is.na(x)), NA, max(x, na.rm = TRUE))
415 }))
416 },
417 "nbSD" = {
418
419 ## Sum by factor cross / rotation :
420 nb_tmp2 <- apply(nb_tmp,
421 which(is.element(names(dimnames(nb_tmp)), c(factors, "rotation"))),
422 function(x) {
423 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE))
424 })
425
426 ## Sum by factor cross :
427 res <- as.array(apply(nb_tmp2,
428 which(is.element(names(dimnames(nb_tmp)), factors)),
429 function(x) {
430 ifelse(all(is.na(x)), NA, sd(x, na.rm = TRUE))
431 }))
432 },
433 "densMax" = {
434
435 ## Sum by factor cross / rotation :
436 dens_tmp2 <- apply(dens_tmp,
437 which(is.element(names(dimnames(dens_tmp)), c(factors, "rotation"))),
438 function(x) {
439 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE))
440 })
441
442 ## Sum by factor cross :
443 res <- as.array(apply(dens_tmp2,
444 which(is.element(names(dimnames(dens_tmp)), factors)),
445 function(x) {
446 ifelse(all(is.na(x)), NA, max(x, na.rm = TRUE))
447 }))
448 },
449 "densSD" = {
450
451 ## Sum by factor cross / rotation :
452 dens_tmp2 <- apply(dens_tmp,
453 which(is.element(names(dimnames(dens_tmp)), c(factors, "rotation"))),
454 function(x) {
455 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE))
456 })
457
458 ## Sum by factor cross :
459 res <- as.array(apply(dens_tmp2,
460 which(is.element(names(dimnames(dens_tmp)), factors)),
461 function(x) {
462 ifelse(all(is.na(x)), NA, sd(x, na.rm = TRUE))
463 }))
464 },
465 "%.nesting" = {
466 res <- tapply(seq_len(nrow(d_ata)),
467 as.list(d_ata[, factors, drop = FALSE]),
468 function(ii) {
469 ifelse(all(is.na(d_ata[ii, metric])),
470 NA,
471 weighted.mean(d_ata[ii, metric],
472 d_ata[ii, "readable.tracks"],
473 na.rm = TRUE))
474 })
475 },
476 stop("Not implemented!")
477 )
478
479 ## dimension names
480 names(dimnames(res)) <- c(factors)
481
482 ## Transformation to long format :
483 reslong <- as.data.frame(as.table(res), responseName = metric)
484 reslong <- reslong[, c(tail(colnames(reslong), 1), head(colnames(reslong), -1))] # metric first
485
486 return(reslong)
487 }
488
489 ######################################### end of the function agregation_f
490
491 ######################################### start of the function agregations_generic_f called y calc_biodiv_f in FucntExeCalcCommIndexesGalaxy.r
492
493 agregations_generic_f <- function(d_ata, metrics, factors, list_fact = NULL, unit_sp_sz = NULL, unit_sp = NULL,
494 nb_name = "number") {
495 ## Purpose: Aggregate data
496 ## ----------------------------------------------------------------------
497 ## Arguments: d_ata : data set
498 ## metrics : aggregated metric
499 ## factors : aggregation factors
500 ## list_fact : other factors to aggregate and add to output
501 ## unit_sp_sz : Metrics table by unitobs/species/Size Class
502 ## unit_sp : Metrics table by unitobs/species
503 ## nb_name : abundance colname
504 ##
505 ## Output : aggregated data frame
506 ## ----------------------------------------------------------------------
507 ## Author: Yves Reecht, Date: 18 oct. 2010, 15:47 modified by Coline ROYAUX 04 june 2020
508
509 ## trt depending on metric type :
510 cas_metric <- c("number" = "sum",
511 "mean.length" = "w.mean",
512 "taille_moy" = "w.mean",
513 "biomass" = "sum",
514 "Biomass" = "sum",
515 "weight" = "sum",
516 "mean.weight" = "w.mean",
517 "density" = "sum",
518 "Density" = "sum",
519 "CPUE" = "sum",
520 "CPUE.biomass" = "sum",
521 "presence_absence" = "pres",
522 "abundance.prop.SC" = "w.mean.prop", # Not OK [!!!] ?
523 "biomass.prop.SC" = "w.mean.prop.bio", # Not OK [!!!] ?
524 ## Benthos :
525 "colonies" = "sum",
526 "coverage" = "sum",
527 "mean.size.colonies" = "w.mean.colonies",
528 ## SVR (expérimental) :
529 "number.max" = "nbMax",
530 "number.sd" = "nbSD",
531 "density.max" = "densMax",
532 "density.sd" = "densSD",
533 "biomass.max" = "sum",
534 "spawning.success" = "%.nesting",
535 "spawnings" = "sum",
536 "readable.tracks" = "sum",
537 "tracks.number" = "sum")
538
539 ## add "readable.tracks" for egg laying percentage :
540 if (any(cas_metric[metrics] == "%.nesting")) {
541 if (is.element("size.class", colnames(d_ata))) {
542 if (is.null(unit_sp_sz)) stop("unit_sp_sz doit être défini")
543
544 d_ata <- merge(d_ata,
545 unit_sp_sz[, c("species.code", "observation.unit", "size.class", "readable.tracks")],
546 by = c("species.code", "observation.unit", "size.class"),
547 suffixes = c("", ".y"))
548 }else{
549 if (is.null(unit_sp)) stop("unit_sp must be defined")
550
551 d_ata <- merge(d_ata,
552 unit_sp[, c("species.code", "observation.unit", "readable.tracks")],
553 by = c("species.code", "observation.unit"),
554 suffixes = c("", ".y"))
555 }
556 }
557
558 ## Add "number" field for computing ponderate means if absent :
559 if (any(cas_metric[metrics] == "w.mean" | cas_metric[metrics] == "w.mean.prop")) {
560 if (is.element("size.class", colnames(d_ata))) {
561 if (is.null(unit_sp_sz)) stop("unit_sp_sz must be defined")
562
563 d_ata <- merge(d_ata,
564 unit_sp_sz[, c("species.code", "observation.unit", "size.class", nb_name)],
565 by = c("species.code", "observation.unit", "size.class"),
566 suffixes = c("", ".y"))
567
568 ## add tot abundance / species / observation unit :
569 nb_tot <- tapply(unit_sp_sz[, nb_name],
570 as.list(unit_sp_sz[, c("species.code", "observation.unit")]),
571 sum, na.rm = TRUE)
572
573 d_ata <- merge(d_ata,
574 as.data.frame(as.table(nb_tot), responseName = "nombre.tot"))
575 }else{
576 if (is.null(unit_sp)) stop("unit_sp must be defined")
577
578 d_ata <- merge(d_ata,
579 unit_sp[, c("species.code", "observation.unit", nb_name)], # [!!!] unit_sp_sz ?
580 by = c("species.code", "observation.unit"),
581 suffixes = c("", ".y"))
582 }
583 }
584
585 ## Add biomass field of biomass proportion by size class :
586 if (any(cas_metric[metrics] == "w.mean.prop.bio")) {
587 if (is.null(unit_sp_sz)) stop("unit_sp_sz doit être défini")
588
589 d_ata <- merge(d_ata,
590 unit_sp_sz[, c("species.code", "observation.unit", "size.class", "biomass")],
591 by = c("species.code", "observation.unit", "size.class"),
592 suffixes = c("", ".y"))
593
594 ## add tot biomass / species / observation unit :
595 biom_tot <- tapply(unit_sp_sz$biomass,
596 as.list(unit_sp_sz[, c("species.code", "observation.unit")]),
597 function(x) {
598 ifelse(all(is.na(x)),
599 NA,
600 sum(x, na.rm = TRUE))
601 })
602
603 d_ata <- merge(d_ata,
604 as.data.frame(as.table(biom_tot), responseName = "tot.biomass"))
605 }
606
607 ## add colony field for ponderate means pondérées if absent :
608 if (any(cas_metric[metrics] == "w.mean.colonies" & ! is.element("colonies", colnames(d_ata)))) {
609 d_ata$colonies <- unit_sp[match(apply(d_ata[, c("species.code", "observation.unit")],
610 1, paste, collapse = "*"),
611 apply(unit_sp[, c("species.code", "observation.unit")],
612 1, paste, collapse = "*")), "colonies"]
613 }
614
615
616 ## Aggregation of metric depending on factors :
617 reslong <- bettercbind(df_list = lapply(metrics, # sapply used to have names
618 agregation_f,
619 d_ata = d_ata, factors = factors, cas_metric = cas_metric,
620 nb_name = nb_name))
621
622 ## Aggregation and add other factors :
623 if (! (is.null(list_fact) || length(list_fact) == 0)) {
624 reslong <- cbind(reslong,
625 sapply(d_ata[, list_fact, drop = FALSE],
626 function(fact) {
627 tapply(fact,
628 as.list(d_ata[, factors, drop = FALSE]),
629 function(x) {
630 if (length(x) > 1 && length(unique(x)) > 1) { # must be one modality
631 return(NULL) # otherwise it is NULL
632 }else{
633 unique(as.character(x))
634 }
635 })
636 }))
637 }
638
639 ## If some factors aren't at the right class :
640 if (any(tmp <- sapply(reslong[, list_fact, drop = FALSE], class) != sapply(d_ata[, list_fact, drop = FALSE], class))) {
641 for (i in which(tmp)) {
642 switch(sapply(d_ata[, list_fact, drop = FALSE], class)[i],
643 "integer" = {
644 reslong[, list_fact[i]] <- as.integer(as.character(reslong[, list_fact[i]]))
645 },
646 "numeric" = {
647 reslong[, list_fact[i]] <- as.numeric(as.character(reslong[, list_fact[i]]))
648 },
649 reslong[, list_fact[i]] <- eval(call(paste("as", sapply(d_ata[, list_fact, drop = FALSE], class)[i], sep = "."),
650 reslong[, list_fact[i]]))
651 )
652 }
653 }
654
655 ## Initial order of factors levels :
656 reslong <- as.data.frame(sapply(colnames(reslong),
657 function(x) {
658 if (is.factor(reslong[, x])) {
659 return(factor(reslong[, x], levels = levels(d_ata[, x])))
660 }else{
661 return(reslong[, x])
662 }
663 }, simplify = FALSE))
664
665
666 ## Check of other aggregated factors supplémentaires. There must be no NULL elements :
667 if (any(sapply(reslong[, list_fact], function(x) {
668 any(is.null(unlist(x)))
669 }))) {
670 warning(paste("One of the suppl. factors is probably a subset",
671 " of the observations grouping factor(s).", sep = ""))
672 return(NULL)
673 }else{
674 return(reslong)
675 }
676 }
677
678 ######################################### end of the function agregations_generic_f
679
680 ######################################### start of the function drop_levels_f called y calc_biodiv_f in FucntExeCalcCommIndexesGalaxy.r and glm_community in FunctExeCalcGLMGalaxy.r
681 drop_levels_f <- function(df, which = NULL) {
682 ## Purpose: Suppress unused levels of factors
683 ## ----------------------------------------------------------------------
684 ## Arguments: df : a data.frame
685 ## which : included columns index (all by default)
686 ## ----------------------------------------------------------------------
687 ## Author: Yves Reecht, Date: 10 août 2010, 13:29 modified by Coline ROYAUX 04 june 2020
688
689 if (class(df) != "data.frame") {
690 stop("'df' must be a data.frame")
691 }else{
692 if (is.null(which)) {
693 x <- as.data.frame(sapply(df, function(x) {
694 return(x[, drop = TRUE])
695 }, simplify = FALSE),
696 stringsAsFactors = FALSE)
697 }else{ # Only some columns used
698 x <- df
699
700 x[, which] <- as.data.frame(sapply(df[, which, drop = FALSE],
701 function(x) {
702 return(x[, drop = TRUE])
703 }, simplify = FALSE),
704 stringsAsFactors = FALSE)
705 }
706
707 return(x)
708 }
709 }
710 ######################################### end of the function drop_levels_f
711
712 ######################################### start of the function subset_all_tables_f called by glm_community in FunctExeCalcGLMGalaxy.r
713
714 subset_all_tables_f <- function(metrique, tab_metrics, facteurs, selections,
715 tab_unitobs, refesp, tab_metrique = "", nb_name = "number", obs_type = "",
716 exclude = NULL, add = c("species.code", "observation.unit")) {
717 ## Purpose: Extract useful data only from chosen metrics and factors
718 ## ----------------------------------------------------------------------
719 ## Arguments: metrique : chosen metric
720 ## facteurs : all chosen factors
721 ## selections : corresponding modality selected
722 ## tab_metrique : metrics table name
723 ## exclude : factors levels to exclude
724 ## add : field to add to data table
725 ## ----------------------------------------------------------------------
726 ## Author: Yves Reecht, Date: 6 août 2010, 16:46 modified by Coline ROYAUX 04 june 2020
727
728 ## If no metrics table available :
729 if (is.element(tab_metrique, c("", "TableOccurrences", "TablePresAbs"))) {
730 tab_metrique <- "unit_sp"
731 }
732
733 cas_tables <- c("unit_sp" = "unit_sp",
734 "TablePresAbs" = "unit_sp",
735 "unit_sp_sz" = "unit_sp_sz")
736
737 ## Recuperation of metrics table :
738 data_metric <- tab_metrics
739 unitobs <- tab_unitobs
740 refesp <- refesp
741
742 ## If no metrics available or already computed :
743 if (is.element(metrique, c("", "occurrence.frequency"))) {
744 metrique <- "tmp"
745 data_metric$tmp <- 0
746 data_metric$tmp[data_metric[, nb_name] > 0] <- 1
747 }
748
749 if (!is.null(add)) {
750 metriques <- c(metrique, add[is.element(add, colnames(data_metric))])
751 }else{
752 metriques <- metrique
753 }
754
755 ## Subset depending on metrics table
756 switch(cas_tables[tab_metrique],
757 ## Observation table by unitobs and species :
758 unit_sp = {
759 restmp <- cbind(data_metric[!is.na(data_metric[, metrique]), metriques, drop = FALSE],
760 unitobs[match(data_metric$observation.unit[!is.na(data_metric[, metrique])],
761 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs
762 facteurs[is.element(facteurs, colnames(unitobs))], drop = FALSE],
763 refesp[match(data_metric$species.code[!is.na(data_metric[, metrique])],
764 refesp$species.code), # ajout des colonnes sélectionnées d'especes
765 facteurs[is.element(facteurs, colnames(refesp))], drop = FALSE])
766 },
767 ## Observation table by unitobs, species and size class :
768 unit_sp_sz = {
769 restmp <- cbind(data_metric[!is.na(data_metric[, metrique]),
770 c(metriques, "size.class"), drop = FALSE],
771 unitobs[match(data_metric$observation.unit[!is.na(data_metric[, metrique])],
772 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs
773 facteurs[is.element(facteurs, colnames(unitobs))], drop = FALSE],
774 refesp[match(data_metric$species.code[!is.na(data_metric[, metrique])],
775 refesp$species.code), # ajout des colonnes sélectionnées d'especes
776 facteurs[is.element(facteurs, colnames(refesp))], drop = FALSE])
777 },
778 ## Other cases :
779 restmp <- cbind(data_metric[!is.na(data_metric[, metrique]), metriques, drop = FALSE],
780 unitobs[match(data_metric$observation.unit[!is.na(data_metric[, metrique])],
781 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs.
782 facteurs[is.element(facteurs, colnames(unitobs))], drop = FALSE])
783 )
784
785 sel_col <- which(!is.na(selections))
786 if (!is.null(exclude)) {
787 sel_col <- sel_col[sel_col != exclude]
788 }
789
790 ## Particular case of size classes :
791 if (is.element("size.class", colnames(restmp))) {
792 if (length(grep("^[[:digit:]]*[-_][[:digit:]]*$", unique(as.character(restmp$size.class)), perl = TRUE)) ==
793 length(unique(as.character(restmp$size.class)))) {
794 restmp[, "size.class"] <-
795 factor(as.character(restmp$size.class),
796 levels = unique(as.character(restmp$size.class))[
797 order(as.numeric(sub("^([[:digit:]]*)[-_][[:digit:]]*$",
798 "\\1",
799 unique(as.character(restmp$size.class)),
800 perl = TRUE)),
801 na.last = FALSE)])
802 }else{
803 restmp[, "size.class"] <- factor(restmp$size.class)
804 }
805 }
806
807 ## Biomass and density conversion -> /100m² :
808 if (any(is.element(colnames(restmp), c("biomass", "density",
809 "biomass.max", "density.max",
810 "biomass.sd", "density.sd"))) && obs_type != "fishing") {
811 restmp[, is.element(colnames(restmp),
812 c("biomass", "density",
813 "biomass.max", "density.max",
814 "biomass.sd", "density.sd"))] <- 100 *
815 restmp[, is.element(colnames(restmp),
816 c("biomass", "density",
817 "biomass.max", "density.max",
818 "biomass.sd", "density.sd"))]
819 }
820
821 return(restmp)
822 }
823
824 ######################################### end of the function subset_all_tables_f
825
826 ######################################### start of the function organise_fact called by modeleLineaireWP2.xxx.f in FunctExeCalcGLMxxGalaxy.r
827
828 organise_fact <- function(list_rand, list_fact) {
829 ## Purpose: organise response factors
830 ## ----------------------------------------------------------------------
831 ## Arguments: list_rand : Analysis random factors list
832 ## list_fact : Analysis factors list
833 ## ----------------------------------------------------------------------
834 ## Author: Coline ROYAUX 14 november 2020
835
836 if (list_rand[1] != "None") {
837 if (all(is.element(list_fact, list_rand)) || list_fact[1] == "None") {
838 resp_fact <- paste("(1|", paste(list_rand, collapse = ") + (1|"), ")")
839 list_f <- NULL
840 list_fact <- list_rand
841 }else{
842 list_f <- list_fact[!is.element(list_fact, list_rand)]
843 resp_fact <- paste(paste(list_f, collapse = " + "), " + (1|", paste(list_rand, collapse = ") + (1|"), ")")
844 list_fact <- c(list_f, list_rand)
845 }
846 }else{
847 list_f <- list_fact
848 resp_fact <- paste(list_fact, collapse = " + ")
849 }
850 return(list(resp_fact, list_f, list_fact))
851 }
852
853 ######################################### end of the function organise_fact
854
855 ######################################### start of the function organise_fact called by modeleLineaireWP2.xxx.f in FunctExeCalcGLMxxGalaxy.r
856 distrib_choice <- function(distrib = distrib, metrique = metrique, data = tmpd_ata) {
857 ## Purpose: choose the right distribution
858 ## ----------------------------------------------------------------------
859 ## Arguments: data : data used for analysis
860 ## metrique : Chosen metric
861 ## distrib : distribution law selected by user
862 ## ----------------------------------------------------------------------
863 ## Author: Coline ROYAUX 14 november 2020
864
865 if (distrib == "None") {
866 if (metrique == "presence_absence") {
867 chose_distrib <- "binomial"
868 }else{
869 switch(class(data[, metrique]),
870 "integer" = {
871 chose_distrib <- "poisson"
872 },
873 "numeric" = {
874 chose_distrib <- "gaussian"
875 },
876 stop("Selected metric class doesn't fit, you should select an integer or a numeric variable"))
877 }
878 }else{
879 chose_distrib <- distrib
880 }
881 return(chose_distrib)
882 }
883
884 ######################################### end of the function organise_fact
885
886 ######################################### start of the function create_res_table called by modeleLineaireWP2.xxx.f in FunctExeCalcGLMxxGalaxy.r
887 create_res_table <- function(list_rand, list_fact, row, lev, distrib) {
888 ## Purpose: create results table
889 ## ----------------------------------------------------------------------
890 ## Arguments: list_rand : Analysis random factors list
891 ## list_fact : Analysis factors list
892 ## row : rows of results table = species or separation factor
893 ## lev : Levels of analysis factors list
894 ## distrib : distribution law
895 ## ----------------------------------------------------------------------
896 ## Author: Coline ROYAUX 04 october 2020
897
898 if (list_rand[1] != "None") { ## if random effects
899 tab_sum <- data.frame(analysis = row, Interest.var = NA, distribution = NA, AIC = NA, BIC = NA, logLik = NA, deviance = NA, df.resid = NA)
900 colrand <- unlist(lapply(list_rand,
901 FUN = function(x) {
902 lapply(c("Std.Dev", "NbObservation", "NbLevels"),
903 FUN = function(y) {
904 paste(x, y, collapse = ":")
905 })
906 }))
907 tab_sum[, colrand] <- NA
908
909 if (! is.null(lev)) { ## if fixed effects + random effects
910 colcoef <- unlist(lapply(c("(Intercept)", lev),
911 FUN = function(x) {
912 lapply(c("Estimate", "Std.Err", "Zvalue", "Pvalue", "IC_up", "IC_inf", "signif"),
913 FUN = function(y) {
914 paste(x, y, collapse = ":")
915 })
916 }))
917
918 }else{ ## if no fixed effects
919 colcoef <- NULL
920 }
921
922 }else{ ## if no random effects
923 tab_sum <- data.frame(analysis = row, Interest.var = NA, distribution = NA, AIC = NA, Resid.deviance = NA, df.resid = NA, Null.deviance = NA, df.null = NA)
924
925 switch(distrib,
926 "gaussian" = {
927 colcoef <- unlist(lapply(c("(Intercept)", lev),
928 FUN = function(x) {
929 lapply(c("Estimate", "Std.Err", "Tvalue", "Pvalue", "IC_up", "IC_inf", "signif"),
930 FUN = function(y) {
931 paste(x, y, collapse = ":")
932 })
933 }))
934
935 },
936 "quasipoisson" = {
937 colcoef <- unlist(lapply(c("(Intercept)", lev),
938 FUN = function(x) {
939 lapply(c("Estimate", "Std.Err", "Tvalue", "Pvalue", "IC_up", "IC_inf", "signif"),
940 FUN = function(y) {
941 paste(x, y, collapse = ":")
942 })
943 }))
944
945 }
946 , {
947 colcoef <- unlist(lapply(c("(Intercept)", lev),
948 FUN = function(x) {
949 lapply(c("Estimate", "Std.Err", "Zvalue", "Pvalue", "IC_up", "IC_inf", "signif"),
950 FUN = function(y) {
951 paste(x, y, collapse = ":")
952 })
953 }))
954 })
955
956 }
957
958 tab_sum[, colcoef] <- NA
959
960
961 return(tab_sum)
962 }
963 ######################################### end of the function create_res_table
964
965 ######################################### start of the function sorties_lm_f called by glm_community in FunctExeCalcGLMGalaxy.r
966 sorties_lm_f <- function(obj_lm, obj_lmy, tab_sum, #formule,
967 metrique, fact_ana, cut, col_ana, list_fact, list_rand, lev = NULL, d_ata,
968 log = FALSE, sufixe = NULL) {
969 ## Purpose: Form GLM and LM results
970 ## ----------------------------------------------------------------------
971 ## Arguments: obj_lm : lm object
972 ## obj_lmy : lm object with year as continuous
973 ## tab_sum : output summary table
974 ## formule : LM formula
975 ## metrique : Chosen metric
976 ## fact_ana : separation factor
977 ## cut : level of separation factor
978 ## col_ana : colname for separation factor in output summary table
979 ## list_fact : Analysis factors list
980 ## list_rand : Analysis random factors list
981 ## levels : Levels of analysis factors list
982 ## d_ata : d_ata used for analysis
983 ## log : put log on metric ? (boolean)
984 ## sufixe : sufix for file name
985 ## ----------------------------------------------------------------------
986 ## Author: Yves Reecht, Date: 25 août 2010, 16:19 modified by Coline ROYAUX 04 june 2020
987
988 tab_sum[, "Interest.var"] <- as.character(metrique)
989 sum_lm <- summary(obj_lm)
990 tab_sum[, "distribution"] <- as.character(sum_lm$family[1])
991
992 if (length(grep("^glmmTMB", obj_lm$call)) > 0) { #if random effects
993 tab_sum[tab_sum[, col_ana] == cut, "AIC"] <- sum_lm$AICtab[1]
994 tab_sum[tab_sum[, col_ana] == cut, "BIC"] <- sum_lm$AICtab[2]
995 tab_sum[tab_sum[, col_ana] == cut, "logLik"] <- sum_lm$AICtab[3]
996 tab_sum[tab_sum[, col_ana] == cut, "deviance"] <- sum_lm$AICtab[4]
997 tab_sum[tab_sum[, col_ana] == cut, "df.resid"] <- sum_lm$AICtab[5]
998
999 if (! is.null(lev)) { ## if fixed effects + random effects
1000 tab_coef <- as.data.frame(sum_lm$coefficients$cond)
1001 tab_coef$signif <- lapply(tab_coef[, "Pr(>|z|)"], FUN = function(x) {
1002 if (!is.na(x) && x < 0.05) {
1003 "yes"
1004 }else{
1005 "no"
1006 }
1007 })
1008
1009 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Zvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "z value"]
1010 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Pvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Pr(>|z|)"]
1011
1012 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Zvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1013 if (length(grep(x, rownames(tab_coef))) > 0) {
1014 tab_coef[grepl(x, rownames(tab_coef)), "z value"]
1015 }else{
1016 NA
1017 }
1018 }))
1019 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Pvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1020 if (length(grep(x, rownames(tab_coef))) > 0) {
1021 tab_coef[grepl(x, rownames(tab_coef)), "Pr(>|z|)"]
1022 }else{
1023 NA
1024 }
1025 }))
1026
1027 if (any(obj_lmy != "")) {
1028 sum_lmy <- summary(obj_lmy)
1029 tab_coefy <- as.data.frame(sum_lmy$coefficients$cond)
1030 tab_coefy$signif <- lapply(tab_coefy[, "Pr(>|z|)"], FUN = function(x) {
1031 if (!is.na(x) && x < 0.05) {
1032 "yes"
1033 }else{
1034 "no"
1035 }
1036 })
1037 tab_sum[tab_sum[, col_ana] == cut, "year Zvalue"] <- ifelse(length(tab_coefy["year", "z value"]) > 0, tab_coefy["year", "z value"], NA)
1038 tab_sum[tab_sum[, col_ana] == cut, "year Pvalue"] <- ifelse(length(tab_coefy["year", "Pr(>|z|)"]) > 0, tab_coefy["year", "Pr(>|z|)"], NA)
1039 }
1040
1041 }
1042
1043 switch(as.character(length(sum_lm$varcor$cond)),
1044 "1" = {
1045 std_d <- c(sum_lm$varcor$cond[[1]])
1046 },
1047 "2" = {
1048 std_d <- c(sum_lm$varcor$cond[[1]], sum_lm$varcor$cond[[2]])
1049 },
1050 std_d <- NULL)
1051
1052 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(list_rand, "Std.Dev", collapse = "|"), colnames(tab_sum))] <- std_d
1053 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(list_rand, "NbObservation", collapse = "|"), colnames(tab_sum))] <- sum_lm$nobs
1054 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(list_rand, "NbLevels", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(list_rand, FUN = function(x) {
1055 nlevels(d_ata[, x])
1056 }))
1057
1058 }else{ ## if fixed effects only
1059
1060 tab_sum[tab_sum[, col_ana] == cut, "AIC"] <- sum_lm$aic
1061 tab_sum[tab_sum[, col_ana] == cut, "Resid.deviance"] <- sum_lm$deviance
1062 tab_sum[tab_sum[, col_ana] == cut, "df.resid"] <- sum_lm$df.residual
1063 tab_sum[tab_sum[, col_ana] == cut, "Null.deviance"] <- sum_lm$null.deviance
1064 tab_sum[tab_sum[, col_ana] == cut, "df.null"] <- sum_lm$df.null
1065 tab_coef <- as.data.frame(sum_lm$coefficients)
1066
1067 if (any(obj_lmy != "")) {
1068 sum_lmy <- summary(obj_lmy)
1069 tab_coefy <- as.data.frame(sum_lmy$coefficients)
1070 }
1071
1072 if (sum_lm$family[1] == "gaussian" || sum_lm$family[1] == "quasipoisson") {
1073
1074 tab_coef$signif <- lapply(tab_coef[, "Pr(>|t|)"], FUN = function(x) {
1075 if (!is.na(x) && x < 0.05) {
1076 "yes"
1077 }else{
1078 "no"
1079 }
1080 })
1081 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Tvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "t value"]
1082 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Pvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Pr(>|t|)"]
1083
1084 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Tvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1085 if (length(grep(x, rownames(tab_coef))) > 0) {
1086 tab_coef[grepl(x, rownames(tab_coef)), "t value"]
1087 }else{
1088 NA
1089 }
1090 }))
1091
1092 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Pvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1093 if (length(grep(x, rownames(tab_coef))) > 0) {
1094 tab_coef[grepl(x, rownames(tab_coef)), "Pr(>|t|)"]
1095 }else{
1096 NA
1097 }
1098 }))
1099
1100 if (any(obj_lmy != "")) {
1101 tab_coefy$signif <- lapply(tab_coefy[, "Pr(>|t|)"], FUN = function(x) {
1102 if (!is.na(x) && x < 0.05) {
1103 "yes"
1104 }else{
1105 "no"
1106 }
1107 })
1108 tab_sum[tab_sum[, col_ana] == cut, "year Tvalue"] <- ifelse(length(tab_coefy["year", "t value"]) > 0, tab_coefy["year", "t value"], NA)
1109 tab_sum[tab_sum[, col_ana] == cut, "year Pvalue"] <- ifelse(length(tab_coefy["year", "Pr(>|z|)"]) > 0, tab_coefy["year", "Pr(>|t|)"], NA)
1110 }
1111
1112 }else{
1113 tab_coef$signif <- lapply(tab_coef[, "Pr(>|z|)"], FUN = function(x) {
1114 if (!is.na(x) && x < 0.05) {
1115 "yes"
1116 }else{
1117 "no"
1118 }
1119 })
1120
1121 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Zvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "z value"]
1122 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Pvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Pr(>|z|)"]
1123
1124 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Zvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1125 if (length(grep(x, rownames(tab_coef))) > 0) {
1126 tab_coef[grepl(x, rownames(tab_coef)), "z value"]
1127 }else{
1128 NA
1129 }
1130 }))
1131 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Pvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1132 if (length(grep(x, rownames(tab_coef))) > 0) {
1133 tab_coef[grepl(x, rownames(tab_coef)), "Pr(>|z|)"]
1134 }else{
1135 NA
1136 }
1137 }))
1138
1139 if (any(obj_lmy != "")) {
1140 tab_coefy$signif <- lapply(tab_coefy[, "Pr(>|z|)"], FUN = function(x) {
1141 if (!is.na(x) && x < 0.05) {
1142 "yes"
1143 }else{
1144 "no"
1145 }
1146 })
1147
1148 tab_sum[tab_sum[, col_ana] == cut, "year Zvalue"] <- ifelse(length(tab_coefy["year", "z value"]) > 0, tab_coefy["year", "z value"], NA)
1149 tab_sum[tab_sum[, col_ana] == cut, "year Pvalue"] <- ifelse(length(tab_coefy["year", "Pr(>|z|)"]) > 0, tab_coefy["year", "Pr(>|z|)"], NA)
1150 }
1151 }
1152 }
1153
1154 if (! is.null(lev)) { ## if fixed effects
1155 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Estimate", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Estimate"]
1156 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Std.Err", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Std. Error"]
1157 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*signif", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "signif"]
1158
1159 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Estimate", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1160 if (length(grep(x, rownames(tab_coef))) > 0) {
1161 tab_coef[grepl(x, rownames(tab_coef)), "Estimate"]
1162 }else{
1163 NA
1164 }
1165 }))
1166 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Std.Err", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1167 if (length(grep(x, rownames(tab_coef))) > 0) {
1168 tab_coef[grepl(x, rownames(tab_coef)), "Std. Error"]
1169 }else{
1170 NA
1171 }
1172 }))
1173 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "signif", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1174 if (length(grep(x, rownames(tab_coef))) > 0) {
1175 tab_coef[grepl(x, rownames(tab_coef)), "signif"]
1176 }else{
1177 NA
1178 }
1179 }))
1180
1181 if (any(obj_lmy != "")) {
1182 tab_sum[tab_sum[, col_ana] == cut, "year Estimate"] <- ifelse(length(tab_coefy["year", "Estimate"]) > 0, tab_coefy["year", "Estimate"], NA)
1183 tab_sum[tab_sum[, col_ana] == cut, "year Std.Err"] <- ifelse(length(tab_coefy["year", "Std. Error"]) > 0, tab_coefy["year", "Std. Error"], NA)
1184 tab_sum[tab_sum[, col_ana] == cut, "year signif"] <- ifelse(length(tab_coefy["year", "signif"]) > 0, tab_coefy["year", "signif"], NA)
1185 }
1186
1187 }
1188
1189 ic <- tryCatch(as.data.frame(confint(obj_lm)), error = function(e) {
1190 })
1191
1192 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "IC_up", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1193 if (length(grep(x, rownames(ic))) > 0) {
1194 ic[grepl(x, rownames(ic)), "97.5 %"]
1195 }else{
1196 NA
1197 }
1198 }))
1199 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "IC_inf", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1200 if (length(grep(x, rownames(ic))) > 0) {
1201 ic[grepl(x, rownames(ic)), "2.5 %"]
1202 }else{
1203 NA
1204 }
1205 }))
1206
1207 return(tab_sum)
1208
1209 }
1210
1211
1212 ######################################### end of the function sorties_lm_f
1213
1214
1215 ######################################### start of the function note_glm_f called by glm_species and glm_community
1216
1217 note_glm_f <- function(data, obj_lm, metric, list_fact, details = FALSE) {
1218 ## Purpose: Note your GLM analysis
1219 ## ----------------------------------------------------------------------
1220 ## Arguments: data : d_ataframe used for analysis
1221 ## obj_lm : GLM assessed
1222 ## metric : selected metric
1223 ## list_fact : Analysis factors list
1224 ## details : detailed output ?
1225 ## ----------------------------------------------------------------------
1226 ## Author: Coline ROYAUX, 26 june 2020
1227
1228 rate <- 0
1229 detres <- list(complete_plan = NA, balanced_plan = NA, NA_proportion_OK = NA, no_residual_dispersion = NA, uniform_residuals = NA, outliers_proportion_OK = NA, no_zero_inflation = NA, observation_factor_ratio_OK = NA, enough_levels_random_effect = NA, rate = NA)
1230
1231 #### d_ata criterions ####
1232
1233 ## Plan
1234
1235 plan <- as.data.frame(table(data[, list_fact]))
1236
1237 if (nrow(plan[plan$Freq == 0, ]) < nrow(plan) * 0.1) { # +0.5 if less than 10% of possible factor's level combinations aren't represented in the sampling scheme
1238 rate <- rate + 0.5
1239 detres$complete_plan <- TRUE
1240
1241 if (summary(as.factor(plan$Freq))[1] > nrow(plan) * 0.9) { # +0.5 if the frequency of the most represented frequency of possible factor's levels combinations is superior to 90% of the total number of possible factor's levels combinations
1242 rate <- rate + 0.5
1243 detres$balanced_plan <- TRUE
1244 }
1245
1246 }else{
1247 detres$complete_plan <- FALSE
1248 detres$balanced_plan <- FALSE
1249 }
1250
1251 if (nrow(data) - nrow(na.omit(data)) < nrow(data) * 0.1) { # +1 if less than 10% of the lines in the dataframe bares a NA
1252 rate <- rate + 1
1253 detres["NA_proportion_OK"] <- TRUE
1254 }else{
1255 detres["NA_proportion_OK"] <- FALSE
1256 }
1257
1258 #### Model criterions ####
1259
1260 if (length(grep("quasi", obj_lm$family)) == 0) { #DHARMa doesn't work with quasi distributions
1261
1262 residuals <- DHARMa::simulateResiduals(obj_lm)
1263
1264 capture.output(test_res <- DHARMa::testResiduals(residuals))
1265 test_zero <- DHARMa::testZeroInflation(residuals)
1266
1267 ## dispersion of residuals
1268
1269 if (test_res$dispersion$p.value > 0.05) { # +1.5 if dispersion tests not significative
1270 rate <- rate + 1.5
1271 detres$no_residual_dispersion <- TRUE
1272 }else{
1273 detres$no_residual_dispersion <- FALSE
1274 }
1275
1276 ## uniformity of residuals
1277
1278 if (test_res$uniformity$p.value > 0.05) { # +1 if uniformity tests not significative
1279 rate <- rate + 1
1280 detres$uniform_residuals <- TRUE
1281 }else{
1282 detres$uniform_residuals <- FALSE
1283 }
1284
1285 ## residuals outliers
1286
1287 if (test_res$outliers$p.value > 0.05) { # +0.5 if outliers tests not significative
1288 rate <- rate + 0.5
1289 detres["outliers_proportion_OK"] <- TRUE
1290 }else{
1291 detres["outliers_proportion_OK"] <- FALSE
1292 }
1293
1294 ## Zero inflation test
1295
1296 if (test_zero$p.value > 0.05) { # +1 if zero inflation tests not significative
1297 rate <- rate + 1
1298 detres$no_zero_inflation <- TRUE
1299 }else{
1300 detres$no_zero_inflation <- FALSE
1301 }
1302
1303 ## Factors/observations ratio
1304
1305 if (length(list_fact) / nrow(na.omit(data)) < 0.1) { # +1 if quantity of factors is less than 10% of the quantity of observations
1306 rate <- rate + 1
1307 detres["observation_factor_ratio_OK"] <- TRUE
1308 }else{
1309 detres["observation_factor_ratio_OK"] <- FALSE
1310 }
1311
1312 ## less than 10 factors' level on random effect
1313
1314 if (length(grep("^glmmTMB", obj_lm$call)) > 0) {
1315 nlev_rand <- c()
1316 for (fact in names(summary(obj_lm)$varcor$cond)) {
1317 nlev_rand <- c(nlev_rand, length(unlist(unique(data[, fact]))))
1318 }
1319
1320 if (all(nlev_rand > 10)) { # +1 if more than 10 levels in one random effect
1321 rate <- rate + 1
1322 detres$enough_levels_random_effect <- TRUE
1323 }else{
1324 detres$enough_levels_random_effect <- FALSE
1325 }
1326 }
1327
1328 detres$rate <- rate
1329
1330 if (details) {
1331 return(detres)
1332 }else{
1333 return(rate)
1334 }
1335
1336 }else{
1337 return(NA)
1338 cat("Models with quasi distributions can't be rated for now")
1339 }
1340 }
1341
1342 ######################################### end of the function note_glm_f
1343
1344 ######################################### start of the function note_glms_f called by glm_species and glm_community
1345
1346 note_glms_f <- function(tab_rate, expr_lm, obj_lm, file_out = FALSE) {
1347 ## Purpose: Note your GLM analysis
1348 ## ----------------------------------------------------------------------
1349 ## Arguments: tab_rate : rates table from note_glm_f
1350 ## expr_lm : GLM expression assessed
1351 ## obj_lm : GLM object
1352 ## file_out : Output as file ? else global rate only
1353 ## ----------------------------------------------------------------------
1354 ## Author: Coline ROYAUX, 26 june 2020
1355 namefile <- "RatingGLM.txt"
1356
1357 if (length(grep("quasi", obj_lm$family)) == 0) { #DHARMa doesn't work with quasi distributions
1358
1359 rate_m <- median(na.omit(tab_rate[, "rate"]))
1360 sum <- summary(obj_lm)
1361
1362 if (length(grep("^glmmTMB", obj_lm$call)) > 0) {
1363 if (median(na.omit(tab_rate[, "rate"])) >= 6) { # if 50% has a rate superior or equal to 6 +1
1364 rate_m <- rate_m + 1
1365 }
1366
1367 if (quantile(na.omit(tab_rate[, "rate"]), probs = 0.9) >= 6) { # if 90% has a rate superior or equal to 6 +1
1368 rate_m <- rate_m + 1
1369 }
1370 }else{
1371 if (median(na.omit(tab_rate[, "rate"])) >= 5) { # if 50% has a rate superior or equal to 5 +1
1372 rate_m <- rate_m + 1
1373 }
1374
1375 if (quantile(na.omit(tab_rate[, "rate"]), probs = 0.9) >= 5) { # if 90% has a rate superior or equal to 5 +1
1376 rate_m <- rate_m + 1
1377 }
1378 }
1379
1380 if (file_out) {
1381
1382 cat("###########################################################################",
1383 "\n########################### Analysis evaluation ###########################",
1384 "\n###########################################################################", file = namefile, fill = 1, append = TRUE)
1385
1386 ## Informations on model :
1387 cat("\n\n######################################### \nFitted model:", file = namefile, fill = 1, append = TRUE)
1388 cat("\t", deparse(expr_lm), "\n\n", file = namefile, sep = "", append = TRUE)
1389 cat("Family: ", sum$family[[1]],
1390 file = namefile, append = TRUE)
1391 cat("\n\nNumber of analysis: ", nrow(tab_rate), file = namefile, append = TRUE)
1392
1393 ## Global rate :
1394 cat("\n\n######################################### \nGlobal rate for all analysis:",
1395 "\n\n", rate_m, "out of 10", file = namefile, append = TRUE)
1396
1397 ## details on every GLM :
1398
1399 cat("\n\n######################################### \nDetails on every analysis:\n\n", file = namefile, append = TRUE)
1400 cat("Analysis\tC1\tC2\tC3\tC4\tC5\tC6\tC7\tC8\tC9\tFinal rate", file = namefile, append = TRUE)
1401 apply(tab_rate, 1, FUN = function(x) {
1402
1403 if (!is.na(x["complete_plan"]) && x["complete_plan"] == TRUE) {
1404 cat("\n", x[1], "\tyes", file = namefile, append = TRUE)
1405 }else{
1406 cat("\n", x[1], "\tno", file = namefile, append = TRUE)
1407 }
1408
1409 for (i in c("balanced_plan", "NA_proportion_OK", "no_residual_dispersion", "uniform_residuals", "outliers_proportion_OK", "no_zero_inflation", "observation_factor_ratio_OK", "enough_levels_random_effect")) {
1410 if (!is.na(x[i]) && x[i] == TRUE) {
1411 cat("\tyes", file = namefile, append = TRUE)
1412 }else{
1413 cat("\tno", file = namefile, append = TRUE)
1414 }
1415 }
1416
1417 cat("\t", x["rate"], "/ 8", file = namefile, append = TRUE)
1418
1419
1420 })
1421 cat("\n\nC1: Complete plan?\nC2: Balanced plan?\nC3: Few NA?\nC4: Regular dispersion?\nC5: Uniform residuals?\nC6: Regular outliers proportion?\nC7: No zero-inflation?\nC8: Good observation/factor ratio?\nC9: Enough levels on random effect?", file = namefile, append = TRUE)
1422
1423 ## Red flags - advice :
1424 cat("\n\n######################################### \nRed flags - advice:\n\n", file = namefile, append = TRUE)
1425 if (all(na.omit(tab_rate["NA_proportion_OK"]) == FALSE)) {
1426 cat("\n", "\t- More than 10% of lines of your dataset contains NAs", file = namefile, append = TRUE)
1427 }
1428
1429 if (length(grep("FALSE", tab_rate["no_residual_dispersion"])) / length(na.omit(tab_rate["no_residual_dispersion"])) > 0.5) {
1430 cat("\n", "\t- More than 50% of your analyses are over- or under- dispersed : Try with another distribution family", file = namefile, append = TRUE)
1431 }
1432
1433 if (length(grep("FALSE", tab_rate["uniform_residuals"])) / length(na.omit(tab_rate["uniform_residuals"])) > 0.5) {
1434 cat("\n", "\t- More than 50% of your analyses haven't an uniform distribution of residuals : Try with another distribution family", file = namefile, append = TRUE)
1435 }
1436
1437 if (length(grep("FALSE", tab_rate["outliers_proportion_OK"])) / length(na.omit(tab_rate["outliers_proportion_OK"])) > 0.5) {
1438 cat("\n", "\t- More than 50% of your analyses have too much outliers : Try with another distribution family or try to select or filter your data", file = namefile, append = TRUE)
1439 }
1440
1441 if (length(grep("FALSE", tab_rate["no_zero_inflation"])) / length(na.omit(tab_rate["no_zero_inflation"])) > 0.5) {
1442 cat("\n", "\t- More than 50% of your analyses have zero inflation : Try to select or filter your data", file = namefile, append = TRUE)
1443 }
1444
1445 if (length(grep("FALSE", tab_rate["observation_factor_ratio_OK"])) / length(na.omit(tab_rate["observation_factor_ratio_OK"])) > 0.5) {
1446 cat("\n", "\t- More than 50% of your analyses have not enough observations for the amount of factors : Try to use less factors in your analysis or try to use another separation factor", file = namefile, append = TRUE)
1447 }
1448
1449 if (any(tab_rate["enough_levels_random_effect"] == FALSE, na.rm = TRUE) && length(grep("^glmmTMB", obj_lm$call)) > 0) {
1450 cat("\n", "\t- Random effect hasn't enough levels to be robust : If it has less than ten levels remove the random effect", file = namefile, append = TRUE)
1451 }
1452 }else{
1453
1454 return(rate_m)
1455
1456 }
1457 }else{
1458 cat("Models with quasi distributions can't be rated for now", file = namefile, append = TRUE)
1459 }
1460 }
1461
1462 ######################################### end of the function note_glm_f
1463
1464 ######################################### start of the function info_stats_f called by glm_species and glm_community
1465
1466 info_stats_f <- function(filename, d_ata, agreg_level = c("species", "unitobs"), type = c("graph", "stat"),
1467 metrique, fact_graph, fact_graph_sel, list_fact, list_fact_sel) {
1468 ## Purpose: informations and simple statistics
1469 ## ----------------------------------------------------------------------
1470 ## Arguments: filename : name of file
1471 ## d_ata : input data
1472 ## agreg_level : aggregation level
1473 ## type : type of function calling
1474 ## metrique : selected metric
1475 ## fact_graph : selection factor
1476 ## fact_graph_sel : list of factors levels selected for this factor
1477 ## list_fact : list of grouping factors
1478 ## list_fact_sel : list of factors levels selected for these factors
1479 ## ----------------------------------------------------------------------
1480 ## Author: Yves Reecht, Date: 10 sept. 2012, 15:26 modified by Coline ROYAUX 04 june 2020
1481
1482 ## Open file :
1483 f_ile <- file(description = filename,
1484 open = "w", encoding = "latin1")
1485
1486 ## if error :
1487 on.exit(if (exists("filename") &&
1488 tryCatch(isOpen(f_ile),
1489 error = function(e)return(FALSE))) close(f_ile))
1490
1491 ## Metrics and factors infos :
1492 print_selection_info_f(metrique = metrique, #fact_graph = fact_graph, fact_graph_sel = fact_graph_sel,
1493 list_fact = list_fact, #list_fact_sel = list_fact_sel,
1494 f_ile = f_ile,
1495 agreg_level = agreg_level, type = type)
1496
1497 ## statistics :
1498 if (class(d_ata) == "list") {
1499 cat("\n###################################################",
1500 "\nStatistics per level of splitting factor:\n",
1501 sep = "", file = f_ile, append = TRUE)
1502
1503 invisible(sapply(seq_len(length(d_ata)),
1504 function(i) {
1505 print_stats_f(d_ata = d_ata[[i]], metrique = metrique, list_fact = list_fact, f_ile = f_ile,
1506 headline = fact_graph_sel[i])
1507 }))
1508 }else{
1509 print_stats_f(d_ata = d_ata, metrique = metrique, list_fact = list_fact, f_ile = f_ile,
1510 headline = NULL)
1511 }
1512
1513 ## Close file :
1514 close(f_ile)
1515
1516 }
1517
1518 ######################################### end of the function info_stats_f
1519
1520
1521 ######################################### start of the function print_selection_info_f called by info_stats_f
1522
1523 print_selection_info_f <- function(metrique, list_fact,
1524 f_ile,
1525 agreg_level = c("species", "unitobs"), type = c("graph", "stat")) {
1526 ## Purpose: Write data informations
1527 ## ----------------------------------------------------------------------
1528 ## Arguments: metrique : chosen metric
1529 ## list_fact : factor's list
1530 ## f_ile : Results file name
1531 ## agreg_level : aggregation level
1532 ## type : function type
1533 ## ----------------------------------------------------------------------
1534 ## Author: Yves Reecht, Date: 11 sept. 2012, 10:41 modified by Coline ROYAUX 04 june 2020
1535
1536 cat("\n##################################################\n",
1537 "Metrics and factors (and possible units/selections):\n",
1538 sep = "", file = f_ile, append = TRUE)
1539
1540 ## metric info :
1541 cat("\n Metrics:", metrique,
1542 "\n", file = f_ile, append = TRUE)
1543
1544 ## Clustering factors :
1545 if (is.element(agreg_level, c("spCL_unitobs", "spCL_espece", "spSpecies", "spEspece",
1546 "spUnitobs", "spUnitobs(CL)"))) {
1547 type <- "spatialGraph"
1548 }
1549
1550 cat(switch(type,
1551 "graph" = "\nGrouping factor(s): \n * ",
1552 "stat" = "\nAnalyses factor(s): \n * ",
1553 "spatialGraph" = "\nSpatial aggregation factor(s): \n * "),
1554 paste(list_fact, collaspe = "\n * "), "\n", file = f_ile, append = TRUE)
1555
1556 }
1557
1558 ######################################### end of the function print_selection_info_f
1559
1560
1561 ######################################### start of the function print_stats_f called by info_stats_f
1562
1563 print_stats_f <- function(d_ata, metrique, list_fact, f_ile, headline = NULL) {
1564 ## Purpose: Write general statistics table
1565 ## ----------------------------------------------------------------------
1566 ## Arguments: d_ata : Analysis data
1567 ## metrique : metric's name
1568 ## list_fact : Factor's list
1569 ## f_ile : Simple statistics file name
1570 ## ----------------------------------------------------------------------
1571 ## Author: Yves Reecht, Date: 11 sept. 2012, 10:09 modified by Coline ROYAUX 04 june 2020
1572
1573 ## Header :
1574 if (! is.null(headline)) {
1575 cat("\n", rep("#", nchar(headline) + 3), "\n",
1576 "## ", headline, "\n",
1577 sep = "", file = f_ile, append = TRUE)
1578 }
1579
1580 cat("\n########################\nBase statistics:\n\n", file = f_ile, append = TRUE)
1581
1582 capture.output(print(summary_fr(d_ata[, metrique])), file = f_ile, append = TRUE)
1583
1584 if (! is.null(list_fact)) {
1585 cat("\n#########################################",
1586 "\nStatistics per combination of factor levels:\n\n", file = f_ile, sep = "", append = TRUE)
1587
1588 ## Compute summary for each existing factor's cross :
1589 res <- with(d_ata,
1590 tapply(eval(parse(text = metrique)),
1591 INDEX = do.call(paste,
1592 c(lapply(list_fact,
1593 function(y)eval(parse(text = y))),
1594 sep = ".")),
1595 FUN = summary_fr))
1596
1597 ## results in table
1598 capture.output(print(do.call(rbind, res)),
1599 file = f_ile, append = TRUE)
1600 }
1601
1602 ## empty line :
1603 cat("\n", file = f_ile, append = TRUE)
1604 }
1605
1606 ######################################### end of the function print_stats_f
1607
1608
1609 ######################################### start of the function summary_fr called by print_stats_f
1610 summary_fr <- function(object, digits = max(3, getOption("digits") - 3), ...) {
1611 ## Purpose: Adding SD and N to summary
1612 ## ----------------------------------------------------------------------
1613 ## Arguments: object : Object to summarise
1614 ## ----------------------------------------------------------------------
1615 ## Author: Yves Reecht, Date: 13 sept. 2012, 15:47 modified by Coline ROYAUX 04 june 2020
1616
1617 if (! is.numeric(object)) stop("Programming error")
1618
1619 ## Compute summary :
1620 res <- c(summary(object = object, digits, ...), "sd" = signif(sd(x = object), digits = digits), "N" = length(object))
1621
1622 return(res)
1623 }
1624
1625 ######################################### start of the function summary_fr