Mercurial > repos > ecology > pampa_plotglm
comparison FunctPAMPAGalaxy.r @ 0:3ab852a7ff53 draft
"planemo upload for repository https://github.com/ColineRoyaux/PAMPA-Galaxy commit 04381ca7162ec3ec68419e308194b91d11cacb04"
author | ecology |
---|---|
date | Mon, 16 Nov 2020 11:02:43 +0000 |
parents | |
children | 5ddca052c314 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:3ab852a7ff53 |
---|---|
1 #Rscript | |
2 | |
3 | |
4 ################################################################################################################################## | |
5 ####################### PAMPA Galaxy tools functions : Calculate metrics, compute GLM and plot ################################# | |
6 ################################################################################################################################## | |
7 | |
8 #### Based on Yves Reecht R script | |
9 #### Modified by Coline ROYAUX for integrating within Galaxy-E | |
10 | |
11 ######################################### start of the function fact.def.f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r | |
12 ####### Define the finest aggregation with the observation table | |
13 | |
14 fact_det_f <- function(obs, | |
15 size_class = "size.class", | |
16 code_species = "species.code", | |
17 unitobs = "observation.unit") { | |
18 if (any(is.element(c(size_class), colnames(obs))) && all(! is.na(obs[, size_class]))) { | |
19 factors <- c(unitobs, code_species, size_class) | |
20 }else{ | |
21 factors <- c(unitobs, code_species) | |
22 } | |
23 return(factors) | |
24 } | |
25 | |
26 ######################################### end of the function fact.def.f | |
27 | |
28 ######################################### start of the function def_typeobs_f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r | |
29 ####### Define observation type from colnames | |
30 | |
31 def_typeobs_f <- function(obs) { | |
32 if (any(is.element(c("rotation", "rot", "rotate"), colnames(obs)))) { | |
33 obs_type <- "SVR" | |
34 }else{ | |
35 obs_type <- "other" | |
36 } | |
37 return(obs_type) | |
38 } | |
39 ######################################### end of the function fact.def.f | |
40 | |
41 ######################################### start of the function create_unitobs called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r | |
42 ####### Create unitobs column when inexistant | |
43 create_unitobs <- function(data, year = "year", location = "location", unitobs = "observation.unit") { | |
44 if (is.element(paste(unitobs), colnames(data))) { | |
45 unitab <- data | |
46 }else{ | |
47 | |
48 unitab <- tidyr::unite(data, col = "observation.unit", c(year, location)) | |
49 } | |
50 return(unitab) | |
51 } | |
52 ######################################### start of the function create_unitobs | |
53 | |
54 ######################################### start of the function create_year_location called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r | |
55 ####### separate unitobs column when existant | |
56 create_year_location <- function(data, year = "year", location = "location", unitobs = "observation.unit") { | |
57 if (all(grepl("[1-2][0|8|9][0-9]{2}_.*", data[, unitobs])) == TRUE) { | |
58 tab <- tidyr::separate(data, col = unitobs, into = c(year, location), sep = "_") | |
59 }else{ | |
60 if (all(grepl("[A-Z]{2}[0-9]{2}.*", data[, unitobs]) == TRUE)) { | |
61 tab <- tidyr::separate(data, col = unitobs, into = c("site1", year, "obs"), sep = c(2, 4)) | |
62 tab <- tidyr::unite(tab, col = location, c("site1", "obs")) | |
63 }else{ | |
64 tab <- data | |
65 } | |
66 } | |
67 | |
68 tab <- cbind(tab, observation.unit = data[, unitobs]) | |
69 | |
70 return(tab) | |
71 } | |
72 ######################################### start of the function create_year_location | |
73 | |
74 ######################################### start of the function check_file called by every Galaxy Rscripts | |
75 | |
76 check_file <- function(dataset, err_msg, vars, nb_vars) { | |
77 | |
78 ## Purpose: General function to check integrity of input file. Will | |
79 ## check numbers and contents of variables(colnames). | |
80 ## return an error message and exit if mismatch detected | |
81 ## ---------------------------------------------------------------------- | |
82 ## Arguments: dataset : dataset name | |
83 ## err_msg : output error | |
84 ## vars : expected name of variables | |
85 ## nb_vars : expected number of variables | |
86 ## ---------------------------------------------------------------------- | |
87 ## Author: Alan Amosse, Benjamin Yguel | |
88 | |
89 if (ncol(dataset) < nb_vars) { #checking for right number of columns in the file if not = error message | |
90 cat("\nerr nb var\n") | |
91 stop(err_msg, call. = FALSE) | |
92 } | |
93 | |
94 for (i in vars) { | |
95 if (!(i %in% names(dataset))) { #checking colnames | |
96 stop(err_msg, call. = FALSE) | |
97 } | |
98 } | |
99 } | |
100 | |
101 ######################################### end of the function check_file | |
102 | |
103 | |
104 ######################################### start of the function stat_rotations_nb_f called by calc_numbers_f | |
105 | |
106 stat_rotations_nb_f <- function(factors, obs) { | |
107 ## Purpose: Computing abundance statistics by rotation (max, sd) | |
108 ## on SVR data | |
109 ## ---------------------------------------------------------------------- | |
110 ## Arguments: factors : Names of aggregation factors | |
111 ## obs : observation data | |
112 ## ---------------------------------------------------------------------- | |
113 ## Author: Yves Reecht, Date: 29 oct. 2012, 16:01 modified by Coline ROYAUX 04 june 2020 | |
114 | |
115 ## Identification of valid rotations : | |
116 if (is.element("observation.unit", factors)) { | |
117 ## valid rotations (empty must be there as well) : | |
118 rotations <- tapply(obs$rotation, | |
119 as.list(obs[, c("observation.unit", "rotation"), drop = FALSE]), | |
120 function(x)length(x) > 0) | |
121 | |
122 ## Changing NA rotations in FALSE : | |
123 rotations[is.na(rotations)] <- FALSE | |
124 } | |
125 | |
126 ## ########################################################### | |
127 ## Abundance per rotation at chosen aggregation factors : | |
128 nombres_rot <- tapply(obs$number, | |
129 as.list(obs[, c(factors, "rotation"), drop = FALSE]), | |
130 function(x, ...) { | |
131 ifelse(all(is.na(x)), NA, sum(x, ...)) | |
132 }, | |
133 na.rm = TRUE) | |
134 | |
135 ## If valid rotation NA are considered 0 : | |
136 nombres_rot <- sweep(nombres_rot, | |
137 match(names(dimnames(rotations)), names(dimnames(nombres_rot)), nomatch = NULL), | |
138 rotations, # Tableau des secteurs valides (booléens). | |
139 function(x, y) { | |
140 x[is.na(x) & y] <- 0 # Lorsque NA et secteur valide => 0. | |
141 return(x) | |
142 }) | |
143 | |
144 ## ################################################## | |
145 ## Statistics : | |
146 | |
147 ## Means : | |
148 nb_mean <- apply(nombres_rot, which(is.element(names(dimnames(nombres_rot)), factors)), | |
149 function(x, ...) { | |
150 ifelse(all(is.na(x)), NA, mean(x, ...)) | |
151 }, na.rm = TRUE) | |
152 | |
153 ## Maxima : | |
154 nb_max <- apply(nombres_rot, which(is.element(names(dimnames(nombres_rot)), factors)), | |
155 function(x, ...) { | |
156 ifelse(all(is.na(x)), NA, max(x, ...)) | |
157 }, na.rm = TRUE) | |
158 | |
159 ## SD : | |
160 nb_sd <- apply(nombres_rot, which(is.element(names(dimnames(nombres_rot)), factors)), | |
161 function(x, ...) { | |
162 ifelse(all(is.na(x)), NA, sd(x, ...)) | |
163 }, na.rm = TRUE) | |
164 | |
165 ## Valid rotations count : | |
166 nombres_rotations <- apply(rotations, 1, sum, na.rm = TRUE) | |
167 | |
168 ## Results returned as list : | |
169 return(list(nb_mean = nb_mean, nb_max = nb_max, nb_sd = nb_sd, | |
170 nombres_rotations = nombres_rotations, nombresTot = nombres_rot)) | |
171 } | |
172 | |
173 ######################################### end of the function stat_rotations_nb_f | |
174 | |
175 ######################################### start of the function calc_nb_default_f called by calc_numbers_f | |
176 | |
177 calc_nb_default_f <- function(obs, | |
178 factors = c("observation.unit", "species.code", "size.class"), | |
179 nb_name = "number") { | |
180 ## Purpose : Compute abundances at finest aggregation | |
181 ## --------------------------------------------------------------------- | |
182 ## Arguments: obs : observation table | |
183 ## factors : aggregation factors | |
184 ## nb_name : name of abundance column. | |
185 ## | |
186 ## Output: array with ndimensions = nfactors. | |
187 ## ---------------------------------------------------------------------- | |
188 ## Author: Yves Reecht, Date: 19 déc. 2011, 13:38 modified by Coline ROYAUX 04 june 2020 | |
189 | |
190 ## Sum individuals number : | |
191 nbr <- tapply(obs[, nb_name], | |
192 as.list(obs[, factors]), | |
193 sum, na.rm = TRUE) | |
194 | |
195 ## Absences as "true zero" : | |
196 nbr[is.na(nbr)] <- 0 | |
197 | |
198 return(nbr) | |
199 } | |
200 | |
201 ######################################### end of the function calc_nb_default_f | |
202 | |
203 ######################################### start of the function calc_numbers_f | |
204 | |
205 calc_numbers_f <- function(obs, obs_type = "", factors = c("observation.unit", "species.code", "size.class"), nb_name = "number") { | |
206 ## Purpose: Produce data.frame used as table from output of calc_nb_default_f(). | |
207 ## ---------------------------------------------------------------------- | |
208 ## Arguments: obs : observation table | |
209 ## obs_type : Type of observation (SVR, LIT, ...) | |
210 ## factors : aggregation factors | |
211 ## nb_name : name of abundance column | |
212 ## | |
213 ## Output: data.frame with (N aggregation factors + 1) columns | |
214 ## ---------------------------------------------------------------------- | |
215 ## Author: Yves Reecht, Date: 19 déc. 2011, 13:46 modified by Coline ROYAUX 04 june 2020 | |
216 | |
217 if (obs_type == "SVR") { | |
218 ## Compute SVR abundances statistics : | |
219 stat_rotations <- stat_rotations_nb_f(factors = factors, | |
220 obs = obs) | |
221 | |
222 ## Mean for rotating videos (3 rotations at most times) : | |
223 nbr <- stat_rotations[["nb_mean"]] | |
224 | |
225 }else{ | |
226 | |
227 nbr <- calc_nb_default_f(obs, factors, nb_name) | |
228 } | |
229 | |
230 res <- as.data.frame(as.table(nbr), responseName = nb_name) | |
231 | |
232 if (is.element("size.class", colnames(res))) { | |
233 res$size.class[res$size.class == ""] <- NA | |
234 } | |
235 | |
236 ## If integer abundances : | |
237 if (isTRUE(all.equal(res[, nb_name], as.integer(res[, nb_name])))) { | |
238 res[, nb_name] <- as.integer(res[, nb_name]) | |
239 } | |
240 | |
241 if (obs_type == "SVR") { | |
242 ## statistics on abundances : | |
243 res[, "number.max"] <- as.vector(stat_rotations[["nb_max"]]) | |
244 res[, "number.sd"] <- as.vector(stat_rotations[["nb_sd"]]) | |
245 | |
246 } | |
247 | |
248 return(res) | |
249 } | |
250 | |
251 ######################################### end of the function calc_numbers_f | |
252 | |
253 ######################################### start of the function pres_abs_f called by calc_biodiv_f | |
254 | |
255 pres_abs_f <- function(nombres, logical = FALSE) { | |
256 ## Purpose: Compute presence absence from abundances | |
257 ## ---------------------------------------------------------------------- | |
258 ## Arguments: nombres : vector of individuals count. | |
259 ## logical : (boolean) results as boolean or 0/1 ? | |
260 ## ---------------------------------------------------------------------- | |
261 ## Author: Yves Reecht, Date: 29 oct. 2010, 10:20 modified by Coline ROYAUX 04 june 2020 | |
262 | |
263 if (any(nombres < 0, na.rm = TRUE)) { | |
264 stop("Negative abundances!") | |
265 } | |
266 | |
267 if (logical) { | |
268 return(nombres > 0) | |
269 }else{ | |
270 nombres[nombres > 0] <- 1 | |
271 return(nombres) | |
272 } | |
273 } | |
274 | |
275 ######################################### end of the function pres_abs_f | |
276 | |
277 ######################################### start of the function bettercbind called by agregations_generic_f | |
278 | |
279 bettercbind <- function(..., df_list = NULL, deparse.level = 1) { | |
280 ## Purpose: Apply cbind to data frame with mathcing columns but without | |
281 ## redundancies. | |
282 ## ---------------------------------------------------------------------- | |
283 ## Arguments: same as cbind... | |
284 ## df_list : data.frames list | |
285 ## ---------------------------------------------------------------------- | |
286 ## Author: Yves Reecht, Date: 17 janv. 2012, 21:10 modified by Coline ROYAUX 04 june 2020 | |
287 | |
288 if (is.null(df_list)) { | |
289 df_list <- list(...) | |
290 } | |
291 | |
292 return(do.call(cbind, | |
293 c(list(df_list[[1]][, c(tail(colnames(df_list[[1]]), -1), | |
294 head(colnames(df_list[[1]]), 1))]), | |
295 lapply(df_list[- 1], | |
296 function(x, coldel) { | |
297 return(x[, !is.element(colnames(x), | |
298 coldel), | |
299 drop = FALSE]) | |
300 }, | |
301 coldel = colnames(df_list[[1]])), | |
302 deparse.level = deparse.level))) | |
303 } | |
304 | |
305 ######################################### end of the function bettercbind | |
306 | |
307 ######################################### start of the function agregation_f called by agregations_generic_f | |
308 | |
309 agregation_f <- function(metric, d_ata, factors, cas_metric, | |
310 nb_name = "number") { | |
311 ## Purpose: metric aggregation | |
312 ## ---------------------------------------------------------------------- | |
313 ## Arguments: metric: colnames of chosen metric | |
314 ## d_ata: Unaggregated data table | |
315 ## factors: aggregation factors vector | |
316 ## cas_metric: named vector of observation types depending | |
317 ## on chosen metric | |
318 ## nb_name : abundance column name | |
319 ## ---------------------------------------------------------------------- | |
320 ## Author: Yves Reecht, Date: 20 déc. 2011, 14:29 modified by Coline ROYAUX 04 june 2020 | |
321 | |
322 switch(cas_metric[metric], | |
323 "sum" = { | |
324 res <- tapply(d_ata[, metric], | |
325 as.list(d_ata[, factors, drop = FALSE]), | |
326 function(x) { | |
327 ifelse(all(is.na(x)), | |
328 NA, | |
329 sum(x, na.rm = TRUE)) | |
330 }) | |
331 }, | |
332 "w.mean" = { | |
333 res <- tapply(seq_len(nrow(d_ata)), | |
334 as.list(d_ata[, factors, drop = FALSE]), | |
335 function(ii) { | |
336 ifelse(all(is.na(d_ata[ii, metric])), | |
337 NA, | |
338 weighted.mean(d_ata[ii, metric], | |
339 d_ata[ii, nb_name], | |
340 na.rm = TRUE)) | |
341 }) | |
342 }, | |
343 "w.mean.colonies" = { | |
344 res <- tapply(seq_len(nrow(d_ata)), | |
345 as.list(d_ata[, factors, drop = FALSE]), | |
346 function(ii) { | |
347 ifelse(all(is.na(d_ata[ii, metric])), | |
348 NA, | |
349 weighted.mean(d_ata[ii, metric], | |
350 d_ata[ii, "colonies"], | |
351 na.rm = TRUE)) | |
352 }) | |
353 }, | |
354 "w.mean.prop" = { | |
355 res <- tapply(seq_len(nrow(d_ata)), | |
356 as.list(d_ata[, factors, drop = FALSE]), | |
357 function(ii) { | |
358 ifelse(all(is.na(d_ata[ii, metric])) || sum(d_ata[ii, "nombre.tot"], na.rm = TRUE) == 0, | |
359 NA, | |
360 ifelse(all(na.omit(d_ata[ii, metric]) == 0), | |
361 0, | |
362 (sum(d_ata[ii, nb_name][!is.na(d_ata[ii, metric])], na.rm = TRUE) / | |
363 sum(d_ata[ii, "nombre.tot"], na.rm = TRUE)) * | |
364 ## Correction if size class isn't an aggregation factor | |
365 ## (otherwise value divided by number of present classes) : | |
366 ifelse(is.element("size.class", factors), | |
367 100, | |
368 100 * length(unique(d_ata$size.class))))) | |
369 }) | |
370 | |
371 }, | |
372 "w.mean.prop.bio" = { | |
373 res <- tapply(seq_len(nrow(d_ata)), | |
374 as.list(d_ata[, factors, drop = FALSE]), | |
375 function(ii) { | |
376 ifelse(all(is.na(d_ata[ii, metric])) || sum(d_ata[ii, "tot.biomass"], na.rm = TRUE) == 0, | |
377 NA, | |
378 ifelse(all(na.omit(d_ata[ii, metric]) == 0), | |
379 0, | |
380 (sum(d_ata[ii, "biomass"][!is.na(d_ata[ii, metric])], na.rm = TRUE) / | |
381 sum(d_ata[ii, "tot.biomass"], na.rm = TRUE)) * | |
382 ## Correction if size class isn't an aggregation factor | |
383 ## (otherwise value divided by number of present classes) : | |
384 ifelse(is.element("size.class", factors), | |
385 100, | |
386 100 * length(unique(d_ata$size.class))))) | |
387 }) | |
388 | |
389 }, | |
390 "pres" = { | |
391 res <- tapply(d_ata[, metric], | |
392 as.list(d_ata[, factors, drop = FALSE]), | |
393 function(x) { | |
394 ifelse(all(is.na(x)), # When only NAs. | |
395 NA, | |
396 ifelse(any(x > 0, na.rm = TRUE), # Otherwise... | |
397 1, # ... presence if at least one observation in the group. | |
398 0)) | |
399 }) | |
400 }, | |
401 "nbMax" = { | |
402 | |
403 ## Sum by factor cross / rotation : | |
404 nb_tmp2 <- apply(nb_tmp, | |
405 which(is.element(names(dimnames(nb_tmp)), c(factors, "rotation"))), | |
406 function(x) { | |
407 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE)) | |
408 }) | |
409 | |
410 ## Sum by factor cross : | |
411 res <- as.array(apply(nb_tmp2, | |
412 which(is.element(names(dimnames(nb_tmp)), factors)), | |
413 function(x) { | |
414 ifelse(all(is.na(x)), NA, max(x, na.rm = TRUE)) | |
415 })) | |
416 }, | |
417 "nbSD" = { | |
418 | |
419 ## Sum by factor cross / rotation : | |
420 nb_tmp2 <- apply(nb_tmp, | |
421 which(is.element(names(dimnames(nb_tmp)), c(factors, "rotation"))), | |
422 function(x) { | |
423 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE)) | |
424 }) | |
425 | |
426 ## Sum by factor cross : | |
427 res <- as.array(apply(nb_tmp2, | |
428 which(is.element(names(dimnames(nb_tmp)), factors)), | |
429 function(x) { | |
430 ifelse(all(is.na(x)), NA, sd(x, na.rm = TRUE)) | |
431 })) | |
432 }, | |
433 "densMax" = { | |
434 | |
435 ## Sum by factor cross / rotation : | |
436 dens_tmp2 <- apply(dens_tmp, | |
437 which(is.element(names(dimnames(dens_tmp)), c(factors, "rotation"))), | |
438 function(x) { | |
439 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE)) | |
440 }) | |
441 | |
442 ## Sum by factor cross : | |
443 res <- as.array(apply(dens_tmp2, | |
444 which(is.element(names(dimnames(dens_tmp)), factors)), | |
445 function(x) { | |
446 ifelse(all(is.na(x)), NA, max(x, na.rm = TRUE)) | |
447 })) | |
448 }, | |
449 "densSD" = { | |
450 | |
451 ## Sum by factor cross / rotation : | |
452 dens_tmp2 <- apply(dens_tmp, | |
453 which(is.element(names(dimnames(dens_tmp)), c(factors, "rotation"))), | |
454 function(x) { | |
455 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE)) | |
456 }) | |
457 | |
458 ## Sum by factor cross : | |
459 res <- as.array(apply(dens_tmp2, | |
460 which(is.element(names(dimnames(dens_tmp)), factors)), | |
461 function(x) { | |
462 ifelse(all(is.na(x)), NA, sd(x, na.rm = TRUE)) | |
463 })) | |
464 }, | |
465 "%.nesting" = { | |
466 res <- tapply(seq_len(nrow(d_ata)), | |
467 as.list(d_ata[, factors, drop = FALSE]), | |
468 function(ii) { | |
469 ifelse(all(is.na(d_ata[ii, metric])), | |
470 NA, | |
471 weighted.mean(d_ata[ii, metric], | |
472 d_ata[ii, "readable.tracks"], | |
473 na.rm = TRUE)) | |
474 }) | |
475 }, | |
476 stop("Not implemented!") | |
477 ) | |
478 | |
479 ## dimension names | |
480 names(dimnames(res)) <- c(factors) | |
481 | |
482 ## Transformation to long format : | |
483 reslong <- as.data.frame(as.table(res), responseName = metric) | |
484 reslong <- reslong[, c(tail(colnames(reslong), 1), head(colnames(reslong), -1))] # metric first | |
485 | |
486 return(reslong) | |
487 } | |
488 | |
489 ######################################### end of the function agregation_f | |
490 | |
491 ######################################### start of the function agregations_generic_f called y calc_biodiv_f in FucntExeCalcCommIndexesGalaxy.r | |
492 | |
493 agregations_generic_f <- function(d_ata, metrics, factors, list_fact = NULL, unit_sp_sz = NULL, unit_sp = NULL, | |
494 nb_name = "number") { | |
495 ## Purpose: Aggregate data | |
496 ## ---------------------------------------------------------------------- | |
497 ## Arguments: d_ata : data set | |
498 ## metrics : aggregated metric | |
499 ## factors : aggregation factors | |
500 ## list_fact : other factors to aggregate and add to output | |
501 ## unit_sp_sz : Metrics table by unitobs/species/Size Class | |
502 ## unit_sp : Metrics table by unitobs/species | |
503 ## nb_name : abundance colname | |
504 ## | |
505 ## Output : aggregated data frame | |
506 ## ---------------------------------------------------------------------- | |
507 ## Author: Yves Reecht, Date: 18 oct. 2010, 15:47 modified by Coline ROYAUX 04 june 2020 | |
508 | |
509 ## trt depending on metric type : | |
510 cas_metric <- c("number" = "sum", | |
511 "mean.length" = "w.mean", | |
512 "taille_moy" = "w.mean", | |
513 "biomass" = "sum", | |
514 "Biomass" = "sum", | |
515 "weight" = "sum", | |
516 "mean.weight" = "w.mean", | |
517 "density" = "sum", | |
518 "Density" = "sum", | |
519 "CPUE" = "sum", | |
520 "CPUE.biomass" = "sum", | |
521 "presence_absence" = "pres", | |
522 "abundance.prop.SC" = "w.mean.prop", # Not OK [!!!] ? | |
523 "biomass.prop.SC" = "w.mean.prop.bio", # Not OK [!!!] ? | |
524 ## Benthos : | |
525 "colonies" = "sum", | |
526 "coverage" = "sum", | |
527 "mean.size.colonies" = "w.mean.colonies", | |
528 ## SVR (expérimental) : | |
529 "number.max" = "nbMax", | |
530 "number.sd" = "nbSD", | |
531 "density.max" = "densMax", | |
532 "density.sd" = "densSD", | |
533 "biomass.max" = "sum", | |
534 "spawning.success" = "%.nesting", | |
535 "spawnings" = "sum", | |
536 "readable.tracks" = "sum", | |
537 "tracks.number" = "sum") | |
538 | |
539 ## add "readable.tracks" for egg laying percentage : | |
540 if (any(cas_metric[metrics] == "%.nesting")) { | |
541 if (is.element("size.class", colnames(d_ata))) { | |
542 if (is.null(unit_sp_sz)) stop("unit_sp_sz doit être défini") | |
543 | |
544 d_ata <- merge(d_ata, | |
545 unit_sp_sz[, c("species.code", "observation.unit", "size.class", "readable.tracks")], | |
546 by = c("species.code", "observation.unit", "size.class"), | |
547 suffixes = c("", ".y")) | |
548 }else{ | |
549 if (is.null(unit_sp)) stop("unit_sp must be defined") | |
550 | |
551 d_ata <- merge(d_ata, | |
552 unit_sp[, c("species.code", "observation.unit", "readable.tracks")], | |
553 by = c("species.code", "observation.unit"), | |
554 suffixes = c("", ".y")) | |
555 } | |
556 } | |
557 | |
558 ## Add "number" field for computing ponderate means if absent : | |
559 if (any(cas_metric[metrics] == "w.mean" | cas_metric[metrics] == "w.mean.prop")) { | |
560 if (is.element("size.class", colnames(d_ata))) { | |
561 if (is.null(unit_sp_sz)) stop("unit_sp_sz must be defined") | |
562 | |
563 d_ata <- merge(d_ata, | |
564 unit_sp_sz[, c("species.code", "observation.unit", "size.class", nb_name)], | |
565 by = c("species.code", "observation.unit", "size.class"), | |
566 suffixes = c("", ".y")) | |
567 | |
568 ## add tot abundance / species / observation unit : | |
569 nb_tot <- tapply(unit_sp_sz[, nb_name], | |
570 as.list(unit_sp_sz[, c("species.code", "observation.unit")]), | |
571 sum, na.rm = TRUE) | |
572 | |
573 d_ata <- merge(d_ata, | |
574 as.data.frame(as.table(nb_tot), responseName = "nombre.tot")) | |
575 }else{ | |
576 if (is.null(unit_sp)) stop("unit_sp must be defined") | |
577 | |
578 d_ata <- merge(d_ata, | |
579 unit_sp[, c("species.code", "observation.unit", nb_name)], # [!!!] unit_sp_sz ? | |
580 by = c("species.code", "observation.unit"), | |
581 suffixes = c("", ".y")) | |
582 } | |
583 } | |
584 | |
585 ## Add biomass field of biomass proportion by size class : | |
586 if (any(cas_metric[metrics] == "w.mean.prop.bio")) { | |
587 if (is.null(unit_sp_sz)) stop("unit_sp_sz doit être défini") | |
588 | |
589 d_ata <- merge(d_ata, | |
590 unit_sp_sz[, c("species.code", "observation.unit", "size.class", "biomass")], | |
591 by = c("species.code", "observation.unit", "size.class"), | |
592 suffixes = c("", ".y")) | |
593 | |
594 ## add tot biomass / species / observation unit : | |
595 biom_tot <- tapply(unit_sp_sz$biomass, | |
596 as.list(unit_sp_sz[, c("species.code", "observation.unit")]), | |
597 function(x) { | |
598 ifelse(all(is.na(x)), | |
599 NA, | |
600 sum(x, na.rm = TRUE)) | |
601 }) | |
602 | |
603 d_ata <- merge(d_ata, | |
604 as.data.frame(as.table(biom_tot), responseName = "tot.biomass")) | |
605 } | |
606 | |
607 ## add colony field for ponderate means pondérées if absent : | |
608 if (any(cas_metric[metrics] == "w.mean.colonies" & ! is.element("colonies", colnames(d_ata)))) { | |
609 d_ata$colonies <- unit_sp[match(apply(d_ata[, c("species.code", "observation.unit")], | |
610 1, paste, collapse = "*"), | |
611 apply(unit_sp[, c("species.code", "observation.unit")], | |
612 1, paste, collapse = "*")), "colonies"] | |
613 } | |
614 | |
615 | |
616 ## Aggregation of metric depending on factors : | |
617 reslong <- bettercbind(df_list = lapply(metrics, # sapply used to have names | |
618 agregation_f, | |
619 d_ata = d_ata, factors = factors, cas_metric = cas_metric, | |
620 nb_name = nb_name)) | |
621 | |
622 ## Aggregation and add other factors : | |
623 if (! (is.null(list_fact) || length(list_fact) == 0)) { | |
624 reslong <- cbind(reslong, | |
625 sapply(d_ata[, list_fact, drop = FALSE], | |
626 function(fact) { | |
627 tapply(fact, | |
628 as.list(d_ata[, factors, drop = FALSE]), | |
629 function(x) { | |
630 if (length(x) > 1 && length(unique(x)) > 1) { # must be one modality | |
631 return(NULL) # otherwise it is NULL | |
632 }else{ | |
633 unique(as.character(x)) | |
634 } | |
635 }) | |
636 })) | |
637 } | |
638 | |
639 ## If some factors aren't at the right class : | |
640 if (any(tmp <- sapply(reslong[, list_fact, drop = FALSE], class) != sapply(d_ata[, list_fact, drop = FALSE], class))) { | |
641 for (i in which(tmp)) { | |
642 switch(sapply(d_ata[, list_fact, drop = FALSE], class)[i], | |
643 "integer" = { | |
644 reslong[, list_fact[i]] <- as.integer(as.character(reslong[, list_fact[i]])) | |
645 }, | |
646 "numeric" = { | |
647 reslong[, list_fact[i]] <- as.numeric(as.character(reslong[, list_fact[i]])) | |
648 }, | |
649 reslong[, list_fact[i]] <- eval(call(paste("as", sapply(d_ata[, list_fact, drop = FALSE], class)[i], sep = "."), | |
650 reslong[, list_fact[i]])) | |
651 ) | |
652 } | |
653 } | |
654 | |
655 ## Initial order of factors levels : | |
656 reslong <- as.data.frame(sapply(colnames(reslong), | |
657 function(x) { | |
658 if (is.factor(reslong[, x])) { | |
659 return(factor(reslong[, x], levels = levels(d_ata[, x]))) | |
660 }else{ | |
661 return(reslong[, x]) | |
662 } | |
663 }, simplify = FALSE)) | |
664 | |
665 | |
666 ## Check of other aggregated factors supplémentaires. There must be no NULL elements : | |
667 if (any(sapply(reslong[, list_fact], function(x) { | |
668 any(is.null(unlist(x))) | |
669 }))) { | |
670 warning(paste("One of the suppl. factors is probably a subset", | |
671 " of the observations grouping factor(s).", sep = "")) | |
672 return(NULL) | |
673 }else{ | |
674 return(reslong) | |
675 } | |
676 } | |
677 | |
678 ######################################### end of the function agregations_generic_f | |
679 | |
680 ######################################### start of the function drop_levels_f called y calc_biodiv_f in FucntExeCalcCommIndexesGalaxy.r and glm_community in FunctExeCalcGLMGalaxy.r | |
681 drop_levels_f <- function(df, which = NULL) { | |
682 ## Purpose: Suppress unused levels of factors | |
683 ## ---------------------------------------------------------------------- | |
684 ## Arguments: df : a data.frame | |
685 ## which : included columns index (all by default) | |
686 ## ---------------------------------------------------------------------- | |
687 ## Author: Yves Reecht, Date: 10 août 2010, 13:29 modified by Coline ROYAUX 04 june 2020 | |
688 | |
689 if (class(df) != "data.frame") { | |
690 stop("'df' must be a data.frame") | |
691 }else{ | |
692 if (is.null(which)) { | |
693 x <- as.data.frame(sapply(df, function(x) { | |
694 return(x[, drop = TRUE]) | |
695 }, simplify = FALSE), | |
696 stringsAsFactors = FALSE) | |
697 }else{ # Only some columns used | |
698 x <- df | |
699 | |
700 x[, which] <- as.data.frame(sapply(df[, which, drop = FALSE], | |
701 function(x) { | |
702 return(x[, drop = TRUE]) | |
703 }, simplify = FALSE), | |
704 stringsAsFactors = FALSE) | |
705 } | |
706 | |
707 return(x) | |
708 } | |
709 } | |
710 ######################################### end of the function drop_levels_f | |
711 | |
712 ######################################### start of the function subset_all_tables_f called by glm_community in FunctExeCalcGLMGalaxy.r | |
713 | |
714 subset_all_tables_f <- function(metrique, tab_metrics, facteurs, selections, | |
715 tab_unitobs, refesp, tab_metrique = "", nb_name = "number", obs_type = "", | |
716 exclude = NULL, add = c("species.code", "observation.unit")) { | |
717 ## Purpose: Extract useful data only from chosen metrics and factors | |
718 ## ---------------------------------------------------------------------- | |
719 ## Arguments: metrique : chosen metric | |
720 ## facteurs : all chosen factors | |
721 ## selections : corresponding modality selected | |
722 ## tab_metrique : metrics table name | |
723 ## exclude : factors levels to exclude | |
724 ## add : field to add to data table | |
725 ## ---------------------------------------------------------------------- | |
726 ## Author: Yves Reecht, Date: 6 août 2010, 16:46 modified by Coline ROYAUX 04 june 2020 | |
727 | |
728 ## If no metrics table available : | |
729 if (is.element(tab_metrique, c("", "TableOccurrences", "TablePresAbs"))) { | |
730 tab_metrique <- "unit_sp" | |
731 } | |
732 | |
733 cas_tables <- c("unit_sp" = "unit_sp", | |
734 "TablePresAbs" = "unit_sp", | |
735 "unit_sp_sz" = "unit_sp_sz") | |
736 | |
737 ## Recuperation of metrics table : | |
738 data_metric <- tab_metrics | |
739 unitobs <- tab_unitobs | |
740 refesp <- refesp | |
741 | |
742 ## If no metrics available or already computed : | |
743 if (is.element(metrique, c("", "occurrence.frequency"))) { | |
744 metrique <- "tmp" | |
745 data_metric$tmp <- 0 | |
746 data_metric$tmp[data_metric[, nb_name] > 0] <- 1 | |
747 } | |
748 | |
749 if (!is.null(add)) { | |
750 metriques <- c(metrique, add[is.element(add, colnames(data_metric))]) | |
751 }else{ | |
752 metriques <- metrique | |
753 } | |
754 | |
755 ## Subset depending on metrics table | |
756 switch(cas_tables[tab_metrique], | |
757 ## Observation table by unitobs and species : | |
758 unit_sp = { | |
759 restmp <- cbind(data_metric[!is.na(data_metric[, metrique]), metriques, drop = FALSE], | |
760 unitobs[match(data_metric$observation.unit[!is.na(data_metric[, metrique])], | |
761 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs | |
762 facteurs[is.element(facteurs, colnames(unitobs))], drop = FALSE], | |
763 refesp[match(data_metric$species.code[!is.na(data_metric[, metrique])], | |
764 refesp$species.code), # ajout des colonnes sélectionnées d'especes | |
765 facteurs[is.element(facteurs, colnames(refesp))], drop = FALSE]) | |
766 }, | |
767 ## Observation table by unitobs, species and size class : | |
768 unit_sp_sz = { | |
769 restmp <- cbind(data_metric[!is.na(data_metric[, metrique]), | |
770 c(metriques, "size.class"), drop = FALSE], | |
771 unitobs[match(data_metric$observation.unit[!is.na(data_metric[, metrique])], | |
772 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs | |
773 facteurs[is.element(facteurs, colnames(unitobs))], drop = FALSE], | |
774 refesp[match(data_metric$species.code[!is.na(data_metric[, metrique])], | |
775 refesp$species.code), # ajout des colonnes sélectionnées d'especes | |
776 facteurs[is.element(facteurs, colnames(refesp))], drop = FALSE]) | |
777 }, | |
778 ## Other cases : | |
779 restmp <- cbind(data_metric[!is.na(data_metric[, metrique]), metriques, drop = FALSE], | |
780 unitobs[match(data_metric$observation.unit[!is.na(data_metric[, metrique])], | |
781 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs. | |
782 facteurs[is.element(facteurs, colnames(unitobs))], drop = FALSE]) | |
783 ) | |
784 | |
785 sel_col <- which(!is.na(selections)) | |
786 if (!is.null(exclude)) { | |
787 sel_col <- sel_col[sel_col != exclude] | |
788 } | |
789 | |
790 ## Particular case of size classes : | |
791 if (is.element("size.class", colnames(restmp))) { | |
792 if (length(grep("^[[:digit:]]*[-_][[:digit:]]*$", unique(as.character(restmp$size.class)), perl = TRUE)) == | |
793 length(unique(as.character(restmp$size.class)))) { | |
794 restmp[, "size.class"] <- | |
795 factor(as.character(restmp$size.class), | |
796 levels = unique(as.character(restmp$size.class))[ | |
797 order(as.numeric(sub("^([[:digit:]]*)[-_][[:digit:]]*$", | |
798 "\\1", | |
799 unique(as.character(restmp$size.class)), | |
800 perl = TRUE)), | |
801 na.last = FALSE)]) | |
802 }else{ | |
803 restmp[, "size.class"] <- factor(restmp$size.class) | |
804 } | |
805 } | |
806 | |
807 ## Biomass and density conversion -> /100m² : | |
808 if (any(is.element(colnames(restmp), c("biomass", "density", | |
809 "biomass.max", "density.max", | |
810 "biomass.sd", "density.sd"))) && obs_type != "fishing") { | |
811 restmp[, is.element(colnames(restmp), | |
812 c("biomass", "density", | |
813 "biomass.max", "density.max", | |
814 "biomass.sd", "density.sd"))] <- 100 * | |
815 restmp[, is.element(colnames(restmp), | |
816 c("biomass", "density", | |
817 "biomass.max", "density.max", | |
818 "biomass.sd", "density.sd"))] | |
819 } | |
820 | |
821 return(restmp) | |
822 } | |
823 | |
824 ######################################### end of the function subset_all_tables_f | |
825 | |
826 ######################################### start of the function organise_fact called by modeleLineaireWP2.xxx.f in FunctExeCalcGLMxxGalaxy.r | |
827 | |
828 organise_fact <- function(list_rand, list_fact) { | |
829 ## Purpose: organise response factors | |
830 ## ---------------------------------------------------------------------- | |
831 ## Arguments: list_rand : Analysis random factors list | |
832 ## list_fact : Analysis factors list | |
833 ## ---------------------------------------------------------------------- | |
834 ## Author: Coline ROYAUX 14 november 2020 | |
835 | |
836 if (list_rand[1] != "None") { | |
837 if (all(is.element(list_fact, list_rand)) || list_fact[1] == "None") { | |
838 resp_fact <- paste("(1|", paste(list_rand, collapse = ") + (1|"), ")") | |
839 list_f <- NULL | |
840 list_fact <- list_rand | |
841 }else{ | |
842 list_f <- list_fact[!is.element(list_fact, list_rand)] | |
843 resp_fact <- paste(paste(list_f, collapse = " + "), " + (1|", paste(list_rand, collapse = ") + (1|"), ")") | |
844 list_fact <- c(list_f, list_rand) | |
845 } | |
846 }else{ | |
847 list_f <- list_fact | |
848 resp_fact <- paste(list_fact, collapse = " + ") | |
849 } | |
850 return(list(resp_fact, list_f, list_fact)) | |
851 } | |
852 | |
853 ######################################### end of the function organise_fact | |
854 | |
855 ######################################### start of the function organise_fact called by modeleLineaireWP2.xxx.f in FunctExeCalcGLMxxGalaxy.r | |
856 distrib_choice <- function(distrib = distrib, metrique = metrique, data = tmpd_ata) { | |
857 ## Purpose: choose the right distribution | |
858 ## ---------------------------------------------------------------------- | |
859 ## Arguments: data : data used for analysis | |
860 ## metrique : Chosen metric | |
861 ## distrib : distribution law selected by user | |
862 ## ---------------------------------------------------------------------- | |
863 ## Author: Coline ROYAUX 14 november 2020 | |
864 | |
865 if (distrib == "None") { | |
866 if (metrique == "presence_absence") { | |
867 chose_distrib <- "binomial" | |
868 }else{ | |
869 switch(class(data[, metrique]), | |
870 "integer" = { | |
871 chose_distrib <- "poisson" | |
872 }, | |
873 "numeric" = { | |
874 chose_distrib <- "gaussian" | |
875 }, | |
876 stop("Selected metric class doesn't fit, you should select an integer or a numeric variable")) | |
877 } | |
878 }else{ | |
879 chose_distrib <- distrib | |
880 } | |
881 return(chose_distrib) | |
882 } | |
883 | |
884 ######################################### end of the function organise_fact | |
885 | |
886 ######################################### start of the function create_res_table called by modeleLineaireWP2.xxx.f in FunctExeCalcGLMxxGalaxy.r | |
887 create_res_table <- function(list_rand, list_fact, row, lev, distrib) { | |
888 ## Purpose: create results table | |
889 ## ---------------------------------------------------------------------- | |
890 ## Arguments: list_rand : Analysis random factors list | |
891 ## list_fact : Analysis factors list | |
892 ## row : rows of results table = species or separation factor | |
893 ## lev : Levels of analysis factors list | |
894 ## distrib : distribution law | |
895 ## ---------------------------------------------------------------------- | |
896 ## Author: Coline ROYAUX 04 october 2020 | |
897 | |
898 if (list_rand[1] != "None") { ## if random effects | |
899 tab_sum <- data.frame(analysis = row, Interest.var = NA, distribution = NA, AIC = NA, BIC = NA, logLik = NA, deviance = NA, df.resid = NA) | |
900 colrand <- unlist(lapply(list_rand, | |
901 FUN = function(x) { | |
902 lapply(c("Std.Dev", "NbObservation", "NbLevels"), | |
903 FUN = function(y) { | |
904 paste(x, y, collapse = ":") | |
905 }) | |
906 })) | |
907 tab_sum[, colrand] <- NA | |
908 | |
909 if (! is.null(lev)) { ## if fixed effects + random effects | |
910 colcoef <- unlist(lapply(c("(Intercept)", lev), | |
911 FUN = function(x) { | |
912 lapply(c("Estimate", "Std.Err", "Zvalue", "Pvalue", "IC_up", "IC_inf", "signif"), | |
913 FUN = function(y) { | |
914 paste(x, y, collapse = ":") | |
915 }) | |
916 })) | |
917 | |
918 }else{ ## if no fixed effects | |
919 colcoef <- NULL | |
920 } | |
921 | |
922 }else{ ## if no random effects | |
923 tab_sum <- data.frame(analysis = row, Interest.var = NA, distribution = NA, AIC = NA, Resid.deviance = NA, df.resid = NA, Null.deviance = NA, df.null = NA) | |
924 | |
925 switch(distrib, | |
926 "gaussian" = { | |
927 colcoef <- unlist(lapply(c("(Intercept)", lev), | |
928 FUN = function(x) { | |
929 lapply(c("Estimate", "Std.Err", "Tvalue", "Pvalue", "IC_up", "IC_inf", "signif"), | |
930 FUN = function(y) { | |
931 paste(x, y, collapse = ":") | |
932 }) | |
933 })) | |
934 | |
935 }, | |
936 "quasipoisson" = { | |
937 colcoef <- unlist(lapply(c("(Intercept)", lev), | |
938 FUN = function(x) { | |
939 lapply(c("Estimate", "Std.Err", "Tvalue", "Pvalue", "IC_up", "IC_inf", "signif"), | |
940 FUN = function(y) { | |
941 paste(x, y, collapse = ":") | |
942 }) | |
943 })) | |
944 | |
945 } | |
946 , { | |
947 colcoef <- unlist(lapply(c("(Intercept)", lev), | |
948 FUN = function(x) { | |
949 lapply(c("Estimate", "Std.Err", "Zvalue", "Pvalue", "IC_up", "IC_inf", "signif"), | |
950 FUN = function(y) { | |
951 paste(x, y, collapse = ":") | |
952 }) | |
953 })) | |
954 }) | |
955 | |
956 } | |
957 | |
958 tab_sum[, colcoef] <- NA | |
959 | |
960 | |
961 return(tab_sum) | |
962 } | |
963 ######################################### end of the function create_res_table | |
964 | |
965 ######################################### start of the function sorties_lm_f called by glm_community in FunctExeCalcGLMGalaxy.r | |
966 sorties_lm_f <- function(obj_lm, obj_lmy, tab_sum, #formule, | |
967 metrique, fact_ana, cut, col_ana, list_fact, list_rand, lev = NULL, d_ata, | |
968 log = FALSE, sufixe = NULL) { | |
969 ## Purpose: Form GLM and LM results | |
970 ## ---------------------------------------------------------------------- | |
971 ## Arguments: obj_lm : lm object | |
972 ## obj_lmy : lm object with year as continuous | |
973 ## tab_sum : output summary table | |
974 ## formule : LM formula | |
975 ## metrique : Chosen metric | |
976 ## fact_ana : separation factor | |
977 ## cut : level of separation factor | |
978 ## col_ana : colname for separation factor in output summary table | |
979 ## list_fact : Analysis factors list | |
980 ## list_rand : Analysis random factors list | |
981 ## levels : Levels of analysis factors list | |
982 ## d_ata : d_ata used for analysis | |
983 ## log : put log on metric ? (boolean) | |
984 ## sufixe : sufix for file name | |
985 ## ---------------------------------------------------------------------- | |
986 ## Author: Yves Reecht, Date: 25 août 2010, 16:19 modified by Coline ROYAUX 04 june 2020 | |
987 | |
988 tab_sum[, "Interest.var"] <- as.character(metrique) | |
989 sum_lm <- summary(obj_lm) | |
990 tab_sum[, "distribution"] <- as.character(sum_lm$family[1]) | |
991 | |
992 if (length(grep("^glmmTMB", obj_lm$call)) > 0) { #if random effects | |
993 tab_sum[tab_sum[, col_ana] == cut, "AIC"] <- sum_lm$AICtab[1] | |
994 tab_sum[tab_sum[, col_ana] == cut, "BIC"] <- sum_lm$AICtab[2] | |
995 tab_sum[tab_sum[, col_ana] == cut, "logLik"] <- sum_lm$AICtab[3] | |
996 tab_sum[tab_sum[, col_ana] == cut, "deviance"] <- sum_lm$AICtab[4] | |
997 tab_sum[tab_sum[, col_ana] == cut, "df.resid"] <- sum_lm$AICtab[5] | |
998 | |
999 if (! is.null(lev)) { ## if fixed effects + random effects | |
1000 tab_coef <- as.data.frame(sum_lm$coefficients$cond) | |
1001 tab_coef$signif <- lapply(tab_coef[, "Pr(>|z|)"], FUN = function(x) { | |
1002 if (!is.na(x) && x < 0.05) { | |
1003 "yes" | |
1004 }else{ | |
1005 "no" | |
1006 } | |
1007 }) | |
1008 | |
1009 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Zvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "z value"] | |
1010 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Pvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Pr(>|z|)"] | |
1011 | |
1012 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Zvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
1013 if (length(grep(x, rownames(tab_coef))) > 0) { | |
1014 tab_coef[grepl(x, rownames(tab_coef)), "z value"] | |
1015 }else{ | |
1016 NA | |
1017 } | |
1018 })) | |
1019 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Pvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
1020 if (length(grep(x, rownames(tab_coef))) > 0) { | |
1021 tab_coef[grepl(x, rownames(tab_coef)), "Pr(>|z|)"] | |
1022 }else{ | |
1023 NA | |
1024 } | |
1025 })) | |
1026 | |
1027 if (any(obj_lmy != "")) { | |
1028 sum_lmy <- summary(obj_lmy) | |
1029 tab_coefy <- as.data.frame(sum_lmy$coefficients$cond) | |
1030 tab_coefy$signif <- lapply(tab_coefy[, "Pr(>|z|)"], FUN = function(x) { | |
1031 if (!is.na(x) && x < 0.05) { | |
1032 "yes" | |
1033 }else{ | |
1034 "no" | |
1035 } | |
1036 }) | |
1037 tab_sum[tab_sum[, col_ana] == cut, "year Zvalue"] <- ifelse(length(tab_coefy["year", "z value"]) > 0, tab_coefy["year", "z value"], NA) | |
1038 tab_sum[tab_sum[, col_ana] == cut, "year Pvalue"] <- ifelse(length(tab_coefy["year", "Pr(>|z|)"]) > 0, tab_coefy["year", "Pr(>|z|)"], NA) | |
1039 } | |
1040 | |
1041 } | |
1042 | |
1043 switch(as.character(length(sum_lm$varcor$cond)), | |
1044 "1" = { | |
1045 std_d <- c(sum_lm$varcor$cond[[1]]) | |
1046 }, | |
1047 "2" = { | |
1048 std_d <- c(sum_lm$varcor$cond[[1]], sum_lm$varcor$cond[[2]]) | |
1049 }, | |
1050 std_d <- NULL) | |
1051 | |
1052 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(list_rand, "Std.Dev", collapse = "|"), colnames(tab_sum))] <- std_d | |
1053 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(list_rand, "NbObservation", collapse = "|"), colnames(tab_sum))] <- sum_lm$nobs | |
1054 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(list_rand, "NbLevels", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(list_rand, FUN = function(x) { | |
1055 nlevels(d_ata[, x]) | |
1056 })) | |
1057 | |
1058 }else{ ## if fixed effects only | |
1059 | |
1060 tab_sum[tab_sum[, col_ana] == cut, "AIC"] <- sum_lm$aic | |
1061 tab_sum[tab_sum[, col_ana] == cut, "Resid.deviance"] <- sum_lm$deviance | |
1062 tab_sum[tab_sum[, col_ana] == cut, "df.resid"] <- sum_lm$df.residual | |
1063 tab_sum[tab_sum[, col_ana] == cut, "Null.deviance"] <- sum_lm$null.deviance | |
1064 tab_sum[tab_sum[, col_ana] == cut, "df.null"] <- sum_lm$df.null | |
1065 tab_coef <- as.data.frame(sum_lm$coefficients) | |
1066 | |
1067 if (any(obj_lmy != "")) { | |
1068 sum_lmy <- summary(obj_lmy) | |
1069 tab_coefy <- as.data.frame(sum_lmy$coefficients) | |
1070 } | |
1071 | |
1072 if (sum_lm$family[1] == "gaussian" || sum_lm$family[1] == "quasipoisson") { | |
1073 | |
1074 tab_coef$signif <- lapply(tab_coef[, "Pr(>|t|)"], FUN = function(x) { | |
1075 if (!is.na(x) && x < 0.05) { | |
1076 "yes" | |
1077 }else{ | |
1078 "no" | |
1079 } | |
1080 }) | |
1081 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Tvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "t value"] | |
1082 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Pvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Pr(>|t|)"] | |
1083 | |
1084 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Tvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
1085 if (length(grep(x, rownames(tab_coef))) > 0) { | |
1086 tab_coef[grepl(x, rownames(tab_coef)), "t value"] | |
1087 }else{ | |
1088 NA | |
1089 } | |
1090 })) | |
1091 | |
1092 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Pvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
1093 if (length(grep(x, rownames(tab_coef))) > 0) { | |
1094 tab_coef[grepl(x, rownames(tab_coef)), "Pr(>|t|)"] | |
1095 }else{ | |
1096 NA | |
1097 } | |
1098 })) | |
1099 | |
1100 if (any(obj_lmy != "")) { | |
1101 tab_coefy$signif <- lapply(tab_coefy[, "Pr(>|t|)"], FUN = function(x) { | |
1102 if (!is.na(x) && x < 0.05) { | |
1103 "yes" | |
1104 }else{ | |
1105 "no" | |
1106 } | |
1107 }) | |
1108 tab_sum[tab_sum[, col_ana] == cut, "year Tvalue"] <- ifelse(length(tab_coefy["year", "t value"]) > 0, tab_coefy["year", "t value"], NA) | |
1109 tab_sum[tab_sum[, col_ana] == cut, "year Pvalue"] <- ifelse(length(tab_coefy["year", "Pr(>|z|)"]) > 0, tab_coefy["year", "Pr(>|t|)"], NA) | |
1110 } | |
1111 | |
1112 }else{ | |
1113 tab_coef$signif <- lapply(tab_coef[, "Pr(>|z|)"], FUN = function(x) { | |
1114 if (!is.na(x) && x < 0.05) { | |
1115 "yes" | |
1116 }else{ | |
1117 "no" | |
1118 } | |
1119 }) | |
1120 | |
1121 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Zvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "z value"] | |
1122 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Pvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Pr(>|z|)"] | |
1123 | |
1124 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Zvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
1125 if (length(grep(x, rownames(tab_coef))) > 0) { | |
1126 tab_coef[grepl(x, rownames(tab_coef)), "z value"] | |
1127 }else{ | |
1128 NA | |
1129 } | |
1130 })) | |
1131 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Pvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
1132 if (length(grep(x, rownames(tab_coef))) > 0) { | |
1133 tab_coef[grepl(x, rownames(tab_coef)), "Pr(>|z|)"] | |
1134 }else{ | |
1135 NA | |
1136 } | |
1137 })) | |
1138 | |
1139 if (any(obj_lmy != "")) { | |
1140 tab_coefy$signif <- lapply(tab_coefy[, "Pr(>|z|)"], FUN = function(x) { | |
1141 if (!is.na(x) && x < 0.05) { | |
1142 "yes" | |
1143 }else{ | |
1144 "no" | |
1145 } | |
1146 }) | |
1147 | |
1148 tab_sum[tab_sum[, col_ana] == cut, "year Zvalue"] <- ifelse(length(tab_coefy["year", "z value"]) > 0, tab_coefy["year", "z value"], NA) | |
1149 tab_sum[tab_sum[, col_ana] == cut, "year Pvalue"] <- ifelse(length(tab_coefy["year", "Pr(>|z|)"]) > 0, tab_coefy["year", "Pr(>|z|)"], NA) | |
1150 } | |
1151 } | |
1152 } | |
1153 | |
1154 if (! is.null(lev)) { ## if fixed effects | |
1155 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Estimate", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Estimate"] | |
1156 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Std.Err", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Std. Error"] | |
1157 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*signif", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "signif"] | |
1158 | |
1159 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Estimate", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
1160 if (length(grep(x, rownames(tab_coef))) > 0) { | |
1161 tab_coef[grepl(x, rownames(tab_coef)), "Estimate"] | |
1162 }else{ | |
1163 NA | |
1164 } | |
1165 })) | |
1166 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Std.Err", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
1167 if (length(grep(x, rownames(tab_coef))) > 0) { | |
1168 tab_coef[grepl(x, rownames(tab_coef)), "Std. Error"] | |
1169 }else{ | |
1170 NA | |
1171 } | |
1172 })) | |
1173 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "signif", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
1174 if (length(grep(x, rownames(tab_coef))) > 0) { | |
1175 tab_coef[grepl(x, rownames(tab_coef)), "signif"] | |
1176 }else{ | |
1177 NA | |
1178 } | |
1179 })) | |
1180 | |
1181 if (any(obj_lmy != "")) { | |
1182 tab_sum[tab_sum[, col_ana] == cut, "year Estimate"] <- ifelse(length(tab_coefy["year", "Estimate"]) > 0, tab_coefy["year", "Estimate"], NA) | |
1183 tab_sum[tab_sum[, col_ana] == cut, "year Std.Err"] <- ifelse(length(tab_coefy["year", "Std. Error"]) > 0, tab_coefy["year", "Std. Error"], NA) | |
1184 tab_sum[tab_sum[, col_ana] == cut, "year signif"] <- ifelse(length(tab_coefy["year", "signif"]) > 0, tab_coefy["year", "signif"], NA) | |
1185 } | |
1186 | |
1187 } | |
1188 | |
1189 ic <- tryCatch(as.data.frame(confint(obj_lm)), error = function(e) { | |
1190 }) | |
1191 | |
1192 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "IC_up", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
1193 if (length(grep(x, rownames(ic))) > 0) { | |
1194 ic[grepl(x, rownames(ic)), "97.5 %"] | |
1195 }else{ | |
1196 NA | |
1197 } | |
1198 })) | |
1199 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "IC_inf", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
1200 if (length(grep(x, rownames(ic))) > 0) { | |
1201 ic[grepl(x, rownames(ic)), "2.5 %"] | |
1202 }else{ | |
1203 NA | |
1204 } | |
1205 })) | |
1206 | |
1207 return(tab_sum) | |
1208 | |
1209 } | |
1210 | |
1211 | |
1212 ######################################### end of the function sorties_lm_f | |
1213 | |
1214 | |
1215 ######################################### start of the function note_glm_f called by glm_species and glm_community | |
1216 | |
1217 note_glm_f <- function(data, obj_lm, metric, list_fact, details = FALSE) { | |
1218 ## Purpose: Note your GLM analysis | |
1219 ## ---------------------------------------------------------------------- | |
1220 ## Arguments: data : d_ataframe used for analysis | |
1221 ## obj_lm : GLM assessed | |
1222 ## metric : selected metric | |
1223 ## list_fact : Analysis factors list | |
1224 ## details : detailed output ? | |
1225 ## ---------------------------------------------------------------------- | |
1226 ## Author: Coline ROYAUX, 26 june 2020 | |
1227 | |
1228 rate <- 0 | |
1229 detres <- list(complete_plan = NA, balanced_plan = NA, NA_proportion_OK = NA, no_residual_dispersion = NA, uniform_residuals = NA, outliers_proportion_OK = NA, no_zero_inflation = NA, observation_factor_ratio_OK = NA, enough_levels_random_effect = NA, rate = NA) | |
1230 | |
1231 #### d_ata criterions #### | |
1232 | |
1233 ## Plan | |
1234 | |
1235 plan <- as.data.frame(table(data[, list_fact])) | |
1236 | |
1237 if (nrow(plan[plan$Freq == 0, ]) < nrow(plan) * 0.1) { # +0.5 if less than 10% of possible factor's level combinations aren't represented in the sampling scheme | |
1238 rate <- rate + 0.5 | |
1239 detres$complete_plan <- TRUE | |
1240 | |
1241 if (summary(as.factor(plan$Freq))[1] > nrow(plan) * 0.9) { # +0.5 if the frequency of the most represented frequency of possible factor's levels combinations is superior to 90% of the total number of possible factor's levels combinations | |
1242 rate <- rate + 0.5 | |
1243 detres$balanced_plan <- TRUE | |
1244 } | |
1245 | |
1246 }else{ | |
1247 detres$complete_plan <- FALSE | |
1248 detres$balanced_plan <- FALSE | |
1249 } | |
1250 | |
1251 if (nrow(data) - nrow(na.omit(data)) < nrow(data) * 0.1) { # +1 if less than 10% of the lines in the dataframe bares a NA | |
1252 rate <- rate + 1 | |
1253 detres["NA_proportion_OK"] <- TRUE | |
1254 }else{ | |
1255 detres["NA_proportion_OK"] <- FALSE | |
1256 } | |
1257 | |
1258 #### Model criterions #### | |
1259 | |
1260 if (length(grep("quasi", obj_lm$family)) == 0) { #DHARMa doesn't work with quasi distributions | |
1261 | |
1262 residuals <- DHARMa::simulateResiduals(obj_lm) | |
1263 | |
1264 capture.output(test_res <- DHARMa::testResiduals(residuals)) | |
1265 test_zero <- DHARMa::testZeroInflation(residuals) | |
1266 | |
1267 ## dispersion of residuals | |
1268 | |
1269 if (test_res$dispersion$p.value > 0.05) { # +1.5 if dispersion tests not significative | |
1270 rate <- rate + 1.5 | |
1271 detres$no_residual_dispersion <- TRUE | |
1272 }else{ | |
1273 detres$no_residual_dispersion <- FALSE | |
1274 } | |
1275 | |
1276 ## uniformity of residuals | |
1277 | |
1278 if (test_res$uniformity$p.value > 0.05) { # +1 if uniformity tests not significative | |
1279 rate <- rate + 1 | |
1280 detres$uniform_residuals <- TRUE | |
1281 }else{ | |
1282 detres$uniform_residuals <- FALSE | |
1283 } | |
1284 | |
1285 ## residuals outliers | |
1286 | |
1287 if (test_res$outliers$p.value > 0.05) { # +0.5 if outliers tests not significative | |
1288 rate <- rate + 0.5 | |
1289 detres["outliers_proportion_OK"] <- TRUE | |
1290 }else{ | |
1291 detres["outliers_proportion_OK"] <- FALSE | |
1292 } | |
1293 | |
1294 ## Zero inflation test | |
1295 | |
1296 if (test_zero$p.value > 0.05) { # +1 if zero inflation tests not significative | |
1297 rate <- rate + 1 | |
1298 detres$no_zero_inflation <- TRUE | |
1299 }else{ | |
1300 detres$no_zero_inflation <- FALSE | |
1301 } | |
1302 | |
1303 ## Factors/observations ratio | |
1304 | |
1305 if (length(list_fact) / nrow(na.omit(data)) < 0.1) { # +1 if quantity of factors is less than 10% of the quantity of observations | |
1306 rate <- rate + 1 | |
1307 detres["observation_factor_ratio_OK"] <- TRUE | |
1308 }else{ | |
1309 detres["observation_factor_ratio_OK"] <- FALSE | |
1310 } | |
1311 | |
1312 ## less than 10 factors' level on random effect | |
1313 | |
1314 if (length(grep("^glmmTMB", obj_lm$call)) > 0) { | |
1315 nlev_rand <- c() | |
1316 for (fact in names(summary(obj_lm)$varcor$cond)) { | |
1317 nlev_rand <- c(nlev_rand, length(unlist(unique(data[, fact])))) | |
1318 } | |
1319 | |
1320 if (all(nlev_rand > 10)) { # +1 if more than 10 levels in one random effect | |
1321 rate <- rate + 1 | |
1322 detres$enough_levels_random_effect <- TRUE | |
1323 }else{ | |
1324 detres$enough_levels_random_effect <- FALSE | |
1325 } | |
1326 } | |
1327 | |
1328 detres$rate <- rate | |
1329 | |
1330 if (details) { | |
1331 return(detres) | |
1332 }else{ | |
1333 return(rate) | |
1334 } | |
1335 | |
1336 }else{ | |
1337 return(NA) | |
1338 cat("Models with quasi distributions can't be rated for now") | |
1339 } | |
1340 } | |
1341 | |
1342 ######################################### end of the function note_glm_f | |
1343 | |
1344 ######################################### start of the function note_glms_f called by glm_species and glm_community | |
1345 | |
1346 note_glms_f <- function(tab_rate, expr_lm, obj_lm, file_out = FALSE) { | |
1347 ## Purpose: Note your GLM analysis | |
1348 ## ---------------------------------------------------------------------- | |
1349 ## Arguments: tab_rate : rates table from note_glm_f | |
1350 ## expr_lm : GLM expression assessed | |
1351 ## obj_lm : GLM object | |
1352 ## file_out : Output as file ? else global rate only | |
1353 ## ---------------------------------------------------------------------- | |
1354 ## Author: Coline ROYAUX, 26 june 2020 | |
1355 namefile <- "RatingGLM.txt" | |
1356 | |
1357 if (length(grep("quasi", obj_lm$family)) == 0) { #DHARMa doesn't work with quasi distributions | |
1358 | |
1359 rate_m <- median(na.omit(tab_rate[, "rate"])) | |
1360 sum <- summary(obj_lm) | |
1361 | |
1362 if (length(grep("^glmmTMB", obj_lm$call)) > 0) { | |
1363 if (median(na.omit(tab_rate[, "rate"])) >= 6) { # if 50% has a rate superior or equal to 6 +1 | |
1364 rate_m <- rate_m + 1 | |
1365 } | |
1366 | |
1367 if (quantile(na.omit(tab_rate[, "rate"]), probs = 0.9) >= 6) { # if 90% has a rate superior or equal to 6 +1 | |
1368 rate_m <- rate_m + 1 | |
1369 } | |
1370 }else{ | |
1371 if (median(na.omit(tab_rate[, "rate"])) >= 5) { # if 50% has a rate superior or equal to 5 +1 | |
1372 rate_m <- rate_m + 1 | |
1373 } | |
1374 | |
1375 if (quantile(na.omit(tab_rate[, "rate"]), probs = 0.9) >= 5) { # if 90% has a rate superior or equal to 5 +1 | |
1376 rate_m <- rate_m + 1 | |
1377 } | |
1378 } | |
1379 | |
1380 if (file_out) { | |
1381 | |
1382 cat("###########################################################################", | |
1383 "\n########################### Analysis evaluation ###########################", | |
1384 "\n###########################################################################", file = namefile, fill = 1, append = TRUE) | |
1385 | |
1386 ## Informations on model : | |
1387 cat("\n\n######################################### \nFitted model:", file = namefile, fill = 1, append = TRUE) | |
1388 cat("\t", deparse(expr_lm), "\n\n", file = namefile, sep = "", append = TRUE) | |
1389 cat("Family: ", sum$family[[1]], | |
1390 file = namefile, append = TRUE) | |
1391 cat("\n\nNumber of analysis: ", nrow(tab_rate), file = namefile, append = TRUE) | |
1392 | |
1393 ## Global rate : | |
1394 cat("\n\n######################################### \nGlobal rate for all analysis:", | |
1395 "\n\n", rate_m, "out of 10", file = namefile, append = TRUE) | |
1396 | |
1397 ## details on every GLM : | |
1398 | |
1399 cat("\n\n######################################### \nDetails on every analysis:\n\n", file = namefile, append = TRUE) | |
1400 cat("Analysis\tC1\tC2\tC3\tC4\tC5\tC6\tC7\tC8\tC9\tFinal rate", file = namefile, append = TRUE) | |
1401 apply(tab_rate, 1, FUN = function(x) { | |
1402 | |
1403 if (!is.na(x["complete_plan"]) && x["complete_plan"] == TRUE) { | |
1404 cat("\n", x[1], "\tyes", file = namefile, append = TRUE) | |
1405 }else{ | |
1406 cat("\n", x[1], "\tno", file = namefile, append = TRUE) | |
1407 } | |
1408 | |
1409 for (i in c("balanced_plan", "NA_proportion_OK", "no_residual_dispersion", "uniform_residuals", "outliers_proportion_OK", "no_zero_inflation", "observation_factor_ratio_OK", "enough_levels_random_effect")) { | |
1410 if (!is.na(x[i]) && x[i] == TRUE) { | |
1411 cat("\tyes", file = namefile, append = TRUE) | |
1412 }else{ | |
1413 cat("\tno", file = namefile, append = TRUE) | |
1414 } | |
1415 } | |
1416 | |
1417 cat("\t", x["rate"], "/ 8", file = namefile, append = TRUE) | |
1418 | |
1419 | |
1420 }) | |
1421 cat("\n\nC1: Complete plan?\nC2: Balanced plan?\nC3: Few NA?\nC4: Regular dispersion?\nC5: Uniform residuals?\nC6: Regular outliers proportion?\nC7: No zero-inflation?\nC8: Good observation/factor ratio?\nC9: Enough levels on random effect?", file = namefile, append = TRUE) | |
1422 | |
1423 ## Red flags - advice : | |
1424 cat("\n\n######################################### \nRed flags - advice:\n\n", file = namefile, append = TRUE) | |
1425 if (all(na.omit(tab_rate["NA_proportion_OK"]) == FALSE)) { | |
1426 cat("\n", "\t- More than 10% of lines of your dataset contains NAs", file = namefile, append = TRUE) | |
1427 } | |
1428 | |
1429 if (length(grep("FALSE", tab_rate["no_residual_dispersion"])) / length(na.omit(tab_rate["no_residual_dispersion"])) > 0.5) { | |
1430 cat("\n", "\t- More than 50% of your analyses are over- or under- dispersed : Try with another distribution family", file = namefile, append = TRUE) | |
1431 } | |
1432 | |
1433 if (length(grep("FALSE", tab_rate["uniform_residuals"])) / length(na.omit(tab_rate["uniform_residuals"])) > 0.5) { | |
1434 cat("\n", "\t- More than 50% of your analyses haven't an uniform distribution of residuals : Try with another distribution family", file = namefile, append = TRUE) | |
1435 } | |
1436 | |
1437 if (length(grep("FALSE", tab_rate["outliers_proportion_OK"])) / length(na.omit(tab_rate["outliers_proportion_OK"])) > 0.5) { | |
1438 cat("\n", "\t- More than 50% of your analyses have too much outliers : Try with another distribution family or try to select or filter your data", file = namefile, append = TRUE) | |
1439 } | |
1440 | |
1441 if (length(grep("FALSE", tab_rate["no_zero_inflation"])) / length(na.omit(tab_rate["no_zero_inflation"])) > 0.5) { | |
1442 cat("\n", "\t- More than 50% of your analyses have zero inflation : Try to select or filter your data", file = namefile, append = TRUE) | |
1443 } | |
1444 | |
1445 if (length(grep("FALSE", tab_rate["observation_factor_ratio_OK"])) / length(na.omit(tab_rate["observation_factor_ratio_OK"])) > 0.5) { | |
1446 cat("\n", "\t- More than 50% of your analyses have not enough observations for the amount of factors : Try to use less factors in your analysis or try to use another separation factor", file = namefile, append = TRUE) | |
1447 } | |
1448 | |
1449 if (any(tab_rate["enough_levels_random_effect"] == FALSE, na.rm = TRUE) && length(grep("^glmmTMB", obj_lm$call)) > 0) { | |
1450 cat("\n", "\t- Random effect hasn't enough levels to be robust : If it has less than ten levels remove the random effect", file = namefile, append = TRUE) | |
1451 } | |
1452 }else{ | |
1453 | |
1454 return(rate_m) | |
1455 | |
1456 } | |
1457 }else{ | |
1458 cat("Models with quasi distributions can't be rated for now", file = namefile, append = TRUE) | |
1459 } | |
1460 } | |
1461 | |
1462 ######################################### end of the function note_glm_f | |
1463 | |
1464 ######################################### start of the function info_stats_f called by glm_species and glm_community | |
1465 | |
1466 info_stats_f <- function(filename, d_ata, agreg_level = c("species", "unitobs"), type = c("graph", "stat"), | |
1467 metrique, fact_graph, fact_graph_sel, list_fact, list_fact_sel) { | |
1468 ## Purpose: informations and simple statistics | |
1469 ## ---------------------------------------------------------------------- | |
1470 ## Arguments: filename : name of file | |
1471 ## d_ata : input data | |
1472 ## agreg_level : aggregation level | |
1473 ## type : type of function calling | |
1474 ## metrique : selected metric | |
1475 ## fact_graph : selection factor | |
1476 ## fact_graph_sel : list of factors levels selected for this factor | |
1477 ## list_fact : list of grouping factors | |
1478 ## list_fact_sel : list of factors levels selected for these factors | |
1479 ## ---------------------------------------------------------------------- | |
1480 ## Author: Yves Reecht, Date: 10 sept. 2012, 15:26 modified by Coline ROYAUX 04 june 2020 | |
1481 | |
1482 ## Open file : | |
1483 f_ile <- file(description = filename, | |
1484 open = "w", encoding = "latin1") | |
1485 | |
1486 ## if error : | |
1487 on.exit(if (exists("filename") && | |
1488 tryCatch(isOpen(f_ile), | |
1489 error = function(e)return(FALSE))) close(f_ile)) | |
1490 | |
1491 ## Metrics and factors infos : | |
1492 print_selection_info_f(metrique = metrique, #fact_graph = fact_graph, fact_graph_sel = fact_graph_sel, | |
1493 list_fact = list_fact, #list_fact_sel = list_fact_sel, | |
1494 f_ile = f_ile, | |
1495 agreg_level = agreg_level, type = type) | |
1496 | |
1497 ## statistics : | |
1498 if (class(d_ata) == "list") { | |
1499 cat("\n###################################################", | |
1500 "\nStatistics per level of splitting factor:\n", | |
1501 sep = "", file = f_ile, append = TRUE) | |
1502 | |
1503 invisible(sapply(seq_len(length(d_ata)), | |
1504 function(i) { | |
1505 print_stats_f(d_ata = d_ata[[i]], metrique = metrique, list_fact = list_fact, f_ile = f_ile, | |
1506 headline = fact_graph_sel[i]) | |
1507 })) | |
1508 }else{ | |
1509 print_stats_f(d_ata = d_ata, metrique = metrique, list_fact = list_fact, f_ile = f_ile, | |
1510 headline = NULL) | |
1511 } | |
1512 | |
1513 ## Close file : | |
1514 close(f_ile) | |
1515 | |
1516 } | |
1517 | |
1518 ######################################### end of the function info_stats_f | |
1519 | |
1520 | |
1521 ######################################### start of the function print_selection_info_f called by info_stats_f | |
1522 | |
1523 print_selection_info_f <- function(metrique, list_fact, | |
1524 f_ile, | |
1525 agreg_level = c("species", "unitobs"), type = c("graph", "stat")) { | |
1526 ## Purpose: Write data informations | |
1527 ## ---------------------------------------------------------------------- | |
1528 ## Arguments: metrique : chosen metric | |
1529 ## list_fact : factor's list | |
1530 ## f_ile : Results file name | |
1531 ## agreg_level : aggregation level | |
1532 ## type : function type | |
1533 ## ---------------------------------------------------------------------- | |
1534 ## Author: Yves Reecht, Date: 11 sept. 2012, 10:41 modified by Coline ROYAUX 04 june 2020 | |
1535 | |
1536 cat("\n##################################################\n", | |
1537 "Metrics and factors (and possible units/selections):\n", | |
1538 sep = "", file = f_ile, append = TRUE) | |
1539 | |
1540 ## metric info : | |
1541 cat("\n Metrics:", metrique, | |
1542 "\n", file = f_ile, append = TRUE) | |
1543 | |
1544 ## Clustering factors : | |
1545 if (is.element(agreg_level, c("spCL_unitobs", "spCL_espece", "spSpecies", "spEspece", | |
1546 "spUnitobs", "spUnitobs(CL)"))) { | |
1547 type <- "spatialGraph" | |
1548 } | |
1549 | |
1550 cat(switch(type, | |
1551 "graph" = "\nGrouping factor(s): \n * ", | |
1552 "stat" = "\nAnalyses factor(s): \n * ", | |
1553 "spatialGraph" = "\nSpatial aggregation factor(s): \n * "), | |
1554 paste(list_fact, collaspe = "\n * "), "\n", file = f_ile, append = TRUE) | |
1555 | |
1556 } | |
1557 | |
1558 ######################################### end of the function print_selection_info_f | |
1559 | |
1560 | |
1561 ######################################### start of the function print_stats_f called by info_stats_f | |
1562 | |
1563 print_stats_f <- function(d_ata, metrique, list_fact, f_ile, headline = NULL) { | |
1564 ## Purpose: Write general statistics table | |
1565 ## ---------------------------------------------------------------------- | |
1566 ## Arguments: d_ata : Analysis data | |
1567 ## metrique : metric's name | |
1568 ## list_fact : Factor's list | |
1569 ## f_ile : Simple statistics file name | |
1570 ## ---------------------------------------------------------------------- | |
1571 ## Author: Yves Reecht, Date: 11 sept. 2012, 10:09 modified by Coline ROYAUX 04 june 2020 | |
1572 | |
1573 ## Header : | |
1574 if (! is.null(headline)) { | |
1575 cat("\n", rep("#", nchar(headline) + 3), "\n", | |
1576 "## ", headline, "\n", | |
1577 sep = "", file = f_ile, append = TRUE) | |
1578 } | |
1579 | |
1580 cat("\n########################\nBase statistics:\n\n", file = f_ile, append = TRUE) | |
1581 | |
1582 capture.output(print(summary_fr(d_ata[, metrique])), file = f_ile, append = TRUE) | |
1583 | |
1584 if (! is.null(list_fact)) { | |
1585 cat("\n#########################################", | |
1586 "\nStatistics per combination of factor levels:\n\n", file = f_ile, sep = "", append = TRUE) | |
1587 | |
1588 ## Compute summary for each existing factor's cross : | |
1589 res <- with(d_ata, | |
1590 tapply(eval(parse(text = metrique)), | |
1591 INDEX = do.call(paste, | |
1592 c(lapply(list_fact, | |
1593 function(y)eval(parse(text = y))), | |
1594 sep = ".")), | |
1595 FUN = summary_fr)) | |
1596 | |
1597 ## results in table | |
1598 capture.output(print(do.call(rbind, res)), | |
1599 file = f_ile, append = TRUE) | |
1600 } | |
1601 | |
1602 ## empty line : | |
1603 cat("\n", file = f_ile, append = TRUE) | |
1604 } | |
1605 | |
1606 ######################################### end of the function print_stats_f | |
1607 | |
1608 | |
1609 ######################################### start of the function summary_fr called by print_stats_f | |
1610 summary_fr <- function(object, digits = max(3, getOption("digits") - 3), ...) { | |
1611 ## Purpose: Adding SD and N to summary | |
1612 ## ---------------------------------------------------------------------- | |
1613 ## Arguments: object : Object to summarise | |
1614 ## ---------------------------------------------------------------------- | |
1615 ## Author: Yves Reecht, Date: 13 sept. 2012, 15:47 modified by Coline ROYAUX 04 june 2020 | |
1616 | |
1617 if (! is.numeric(object)) stop("Programming error") | |
1618 | |
1619 ## Compute summary : | |
1620 res <- c(summary(object = object, digits, ...), "sd" = signif(sd(x = object), digits = digits), "N" = length(object)) | |
1621 | |
1622 return(res) | |
1623 } | |
1624 | |
1625 ######################################### start of the function summary_fr |