comparison FunctPAMPAGalaxy.r @ 3:8d8aec182fb1 draft

"planemo upload for repository https://github.com/ColineRoyaux/PAMPA-Galaxy commit 04381ca7162ec3ec68419e308194b91d11cacb04"
author ecology
date Mon, 16 Nov 2020 11:02:09 +0000
parents c9dfe4e20a45
children 07b081730994
comparison
equal deleted inserted replaced
2:f1bfdeb5ebfe 3:8d8aec182fb1
9 #### Modified by Coline ROYAUX for integrating within Galaxy-E 9 #### Modified by Coline ROYAUX for integrating within Galaxy-E
10 10
11 ######################################### start of the function fact.def.f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r 11 ######################################### start of the function fact.def.f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r
12 ####### Define the finest aggregation with the observation table 12 ####### Define the finest aggregation with the observation table
13 13
14 fact.det.f <- function (Obs, 14 fact_det_f <- function(obs,
15 size.class="size.class", 15 size_class = "size.class",
16 code.especes="species.code", 16 code_species = "species.code",
17 unitobs="observation.unit") 17 unitobs = "observation.unit") {
18 { 18 if (any(is.element(c(size_class), colnames(obs))) && all(! is.na(obs[, size_class]))) {
19 if (any(is.element(c(size.class), colnames(obs))) && all(! is.na(obs[, size.class]))) 19 factors <- c(unitobs, code_species, size_class)
20 {
21 factors <- c(unitobs, code.especes, size.class)
22 }else{ 20 }else{
23 factors <- c(unitobs, code.especes) 21 factors <- c(unitobs, code_species)
24 } 22 }
25 return(factors) 23 return(factors)
26 } 24 }
27 25
28 ######################################### end of the function fact.def.f 26 ######################################### end of the function fact.def.f
29 27
30 ######################################### start of the function def.typeobs.f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r 28 ######################################### start of the function def_typeobs_f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r
31 ####### Define observation type from colnames 29 ####### Define observation type from colnames
32 30
33 def.typeobs.f <- function(Obs) 31 def_typeobs_f <- function(obs) {
34 { 32 if (any(is.element(c("rotation", "rot", "rotate"), colnames(obs)))) {
35 if (any(is.element(c("rotation","rot","rotate"),colnames(obs)))) 33 obs_type <- "SVR"
36 { 34 }else{
37 ObsType <- "SVR" 35 obs_type <- "other"
38 }else{ 36 }
39 ObsType <- "other" 37 return(obs_type)
40 } 38 }
41 return(ObsType) 39 ######################################### end of the function fact.def.f
42 } 40
43 ######################################### end of the function fact.def.f 41 ######################################### start of the function create_unitobs called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r
44
45 ######################################### start of the function create.unitobs called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r
46 ####### Create unitobs column when inexistant 42 ####### Create unitobs column when inexistant
47 create.unitobs <- function(data,year="year",point="point", unitobs="observation.unit") 43 create_unitobs <- function(data, year = "year", location = "location", unitobs = "observation.unit") {
48 { 44 if (is.element(paste(unitobs), colnames(data))) {
49 if (is.element(paste(unitobs),colnames(data)) && all(grepl("[1-2][0|8|9][0-9]{2}_.*",data[,unitobs])==FALSE)) 45 unitab <- data
50 { 46 }else{
51 unitab <- data 47
52 48 unitab <- tidyr::unite(data, col = "observation.unit", c(year, location))
53 }else{
54
55 unitab <- unite(data,col="observation.unit",c(year,point))
56 } 49 }
57 return(unitab) 50 return(unitab)
58 } 51 }
59 ######################################### start of the function create.unitobs 52 ######################################### start of the function create_unitobs
60 53
61 ######################################### start of the function create.year.point called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r 54 ######################################### start of the function create_year_location called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r
62 ####### separate unitobs column when existant 55 ####### separate unitobs column when existant
63 create.year.point <- function(data,year="year",point="point", unitobs="observation.unit") 56 create_year_location <- function(data, year = "year", location = "location", unitobs = "observation.unit") {
64 { 57 if (all(grepl("[1-2][0|8|9][0-9]{2}_.*", data[, unitobs])) == TRUE) {
65 if (all(grepl("[1-2][0|8|9][0-9]{2}_.*",data[,unitobs]))==TRUE) 58 tab <- tidyr::separate(data, col = unitobs, into = c(year, location), sep = "_")
66 { 59 }else{
67 tab <- separate(data,col=unitobs,into=c(year,point),sep="_") 60 if (all(grepl("[A-Z]{2}[0-9]{2}.*", data[, unitobs]) == TRUE)) {
68 }else{ 61 tab <- tidyr::separate(data, col = unitobs, into = c("site1", year, "obs"), sep = c(2, 4))
69 tab <- separate(data,col=unitobs,into=c("site1", year,"obs"),sep=c(2,4)) 62 tab <- tidyr::unite(tab, col = location, c("site1", "obs"))
70 tab <- unite(tab, col=point, c("site1","obs")) 63 }else{
71 64 tab <- data
72 } 65 }
73 66 }
74 tab <- cbind(tab,observation.unit = data[,unitobs]) 67
68 tab <- cbind(tab, observation.unit = data[, unitobs])
75 69
76 return(tab) 70 return(tab)
77 } 71 }
78 ######################################### start of the function create.unitobs 72 ######################################### start of the function create_year_location
79 73
80 ######################################### start of the function check_file called by every Galaxy Rscripts 74 ######################################### start of the function check_file called by every Galaxy Rscripts
81 75
82 check_file<-function(dataset,err_msg,vars,nb_vars){ 76 check_file <- function(dataset, err_msg, vars, nb_vars) {
83 77
84 ## Purpose: General function to check integrity of input file. Will 78 ## Purpose: General function to check integrity of input file. Will
85 ## check numbers and contents of variables(colnames). 79 ## check numbers and contents of variables(colnames).
86 ## return an error message and exit if mismatch detected 80 ## return an error message and exit if mismatch detected
87 ## ---------------------------------------------------------------------- 81 ## ----------------------------------------------------------------------
88 ## Arguments: dataset : dataset name 82 ## Arguments: dataset : dataset name
89 ## err_msg : output error 83 ## err_msg : output error
90 ## vars : expected name of variables 84 ## vars : expected name of variables
91 ## nb_vars : expected number of variables 85 ## nb_vars : expected number of variables
92 ## ---------------------------------------------------------------------- 86 ## ----------------------------------------------------------------------
93 ## Author: Alan Amosse, Benjamin Yguel 87 ## Author: Alan Amosse, Benjamin Yguel
94 88
95 if(ncol(dataset) < nb_vars){ #checking for right number of columns in the file if not = error message 89 if (ncol(dataset) < nb_vars) { #checking for right number of columns in the file if not = error message
96 cat("\nerr nb var\n") 90 cat("\nerr nb var\n")
97 stop(err_msg, call.=FALSE) 91 stop(err_msg, call. = FALSE)
98 } 92 }
99 93
100 for(i in vars){ 94 for (i in vars) {
101 if(!(i %in% names(dataset))){ #checking colnames 95 if (!(i %in% names(dataset))) { #checking colnames
102 stop(err_msg,call.=FALSE) 96 stop(err_msg, call. = FALSE)
103 } 97 }
104 } 98 }
105 } 99 }
106 100
107 ######################################### end of the function check_file 101 ######################################### end of the function check_file
108 102
109 103
110 ######################################### start of the function statRotationsNumber.f called by calc.numbers.f 104 ######################################### start of the function stat_rotations_nb_f called by calc_numbers_f
111 105
112 statRotationsNumber.f <- function(factors, obs) 106 stat_rotations_nb_f <- function(factors, obs) {
113 { 107 ## Purpose: Computing abundance statistics by rotation (max, sd)
114 ## Purpose: Computing abundance statistics by rotation (max, sd)
115 ## on SVR data 108 ## on SVR data
116 ## ---------------------------------------------------------------------- 109 ## ----------------------------------------------------------------------
117 ## Arguments: factors : Names of aggregation factors 110 ## Arguments: factors : Names of aggregation factors
118 ## obs : observation data 111 ## obs : observation data
119 ## ---------------------------------------------------------------------- 112 ## ----------------------------------------------------------------------
120 ## Author: Yves Reecht, Date: 29 oct. 2012, 16:01 modified by Coline ROYAUX 04 june 2020 113 ## Author: Yves Reecht, Date: 29 oct. 2012, 16:01 modified by Coline ROYAUX 04 june 2020
121 114
122 ## Identification of valid rotations : 115 ## Identification of valid rotations :
123 if (is.element("observation.unit", factors)) 116 if (is.element("observation.unit", factors)) {
124 {
125 ## valid rotations (empty must be there as well) : 117 ## valid rotations (empty must be there as well) :
126 rotations <- tapply(obs$rotation, 118 rotations <- tapply(obs$rotation,
127 as.list(obs[ , c("observation.unit", "rotation"), drop=FALSE]), 119 as.list(obs[, c("observation.unit", "rotation"), drop = FALSE]),
128 function(x)length(x) > 0) 120 function(x)length(x) > 0)
129 121
130 ## Changing NA rotations in FALSE : 122 ## Changing NA rotations in FALSE :
131 rotations[is.na(rotations)] <- FALSE 123 rotations[is.na(rotations)] <- FALSE
132 }else{
133 #stop(mltext("statRotations.err.1"))
134 } 124 }
135 125
136 ## ########################################################### 126 ## ###########################################################
137 ## Abundance per rotation at chosen aggregation factors : 127 ## Abundance per rotation at chosen aggregation factors :
138 nombresR <- tapply(obs$number, 128 nombres_rot <- tapply(obs$number,
139 as.list(obs[ , c(factors, "rotation"), drop=FALSE]), 129 as.list(obs[, c(factors, "rotation"), drop = FALSE]),
140 function(x,...){ifelse(all(is.na(x)), NA, sum(x,...))}, 130 function(x, ...) {
131 ifelse(all(is.na(x)), NA, sum(x, ...))
132 },
141 na.rm = TRUE) 133 na.rm = TRUE)
142 134
143 ## If valid rotation NA are considered 0 : 135 ## If valid rotation NA are considered 0 :
144 nombresR <- sweep(nombresR, 136 nombres_rot <- sweep(nombres_rot,
145 match(names(dimnames(rotations)), names(dimnames(nombresR)), nomatch=NULL), 137 match(names(dimnames(rotations)), names(dimnames(nombres_rot)), nomatch = NULL),
146 rotations, # Tableau des secteurs valides (booléens). 138 rotations, # Tableau des secteurs valides (booléens).
147 function(x, y) 139 function(x, y) {
148 { 140 x[is.na(x) & y] <- 0 # Lorsque NA et secteur valide => 0.
149 x[is.na(x) & y] <- 0 # Lorsque NA et secteur valide => 0. 141 return(x)
150 return(x) 142 })
151 })
152 143
153 ## ################################################## 144 ## ##################################################
154 ## Statistics : 145 ## Statistics :
155 146
156 ## Means : 147 ## Means :
157 nombresMean <- apply(nombresR, which(is.element(names(dimnames(nombresR)), factors)), 148 nb_mean <- apply(nombres_rot, which(is.element(names(dimnames(nombres_rot)), factors)),
158 function(x,...){ifelse(all(is.na(x)), NA, mean(x,...))}, na.rm=TRUE) 149 function(x, ...) {
150 ifelse(all(is.na(x)), NA, mean(x, ...))
151 }, na.rm = TRUE)
159 152
160 ## Maxima : 153 ## Maxima :
161 nombresMax <- apply(nombresR, which(is.element(names(dimnames(nombresR)), factors)), 154 nb_max <- apply(nombres_rot, which(is.element(names(dimnames(nombres_rot)), factors)),
162 function(x,...){ifelse(all(is.na(x)), NA, max(x,...))}, na.rm=TRUE) 155 function(x, ...) {
156 ifelse(all(is.na(x)), NA, max(x, ...))
157 }, na.rm = TRUE)
163 158
164 ## SD : 159 ## SD :
165 nombresSD <- apply(nombresR, which(is.element(names(dimnames(nombresR)), factors)), 160 nb_sd <- apply(nombres_rot, which(is.element(names(dimnames(nombres_rot)), factors)),
166 function(x,...){ifelse(all(is.na(x)), NA, sd(x,...))}, na.rm=TRUE) 161 function(x, ...) {
162 ifelse(all(is.na(x)), NA, sd(x, ...))
163 }, na.rm = TRUE)
167 164
168 ## Valid rotations count : 165 ## Valid rotations count :
169 nombresRotations <- apply(rotations, 1, sum, na.rm=TRUE) 166 nombres_rotations <- apply(rotations, 1, sum, na.rm = TRUE)
170 167
171 ## Results returned as list : 168 ## Results returned as list :
172 return(list(nombresMean=nombresMean, nombresMax=nombresMax, nombresSD=nombresSD, 169 return(list(nb_mean = nb_mean, nb_max = nb_max, nb_sd = nb_sd,
173 nombresRotations=nombresRotations, nombresTot=nombresR)) 170 nombres_rotations = nombres_rotations, nombresTot = nombres_rot))
174 } 171 }
175 172
176 ######################################### end of the function statRotationsNumber.f 173 ######################################### end of the function stat_rotations_nb_f
177 174
178 ######################################### start of the function calcNumber.default.f called by calc.numbers.f 175 ######################################### start of the function calc_nb_default_f called by calc_numbers_f
179 176
180 calcNumber.default.f <- function(obs, 177 calc_nb_default_f <- function(obs,
181 factors=c("observation.unit", "species.code", "size.class"), 178 factors = c("observation.unit", "species.code", "size.class"),
182 nbName="number") 179 nb_name = "number") {
183 { 180 ## Purpose : Compute abundances at finest aggregation
184 ## Purpose : Compute abundances at finest aggregation
185 ## --------------------------------------------------------------------- 181 ## ---------------------------------------------------------------------
186 ## Arguments: obs : observation table 182 ## Arguments: obs : observation table
187 ## factors : aggregation factors 183 ## factors : aggregation factors
188 ## nbName : name of abundance column. 184 ## nb_name : name of abundance column.
189 ## 185 ##
190 ## Output: array with ndimensions = nfactors. 186 ## Output: array with ndimensions = nfactors.
191 ## ---------------------------------------------------------------------- 187 ## ----------------------------------------------------------------------
192 ## Author: Yves Reecht, Date: 19 déc. 2011, 13:38 modified by Coline ROYAUX 04 june 2020 188 ## Author: Yves Reecht, Date: 19 déc. 2011, 13:38 modified by Coline ROYAUX 04 june 2020
193 189
194 ## Sum individuals number : 190 ## Sum individuals number :
195 nbr <- tapply(obs[ , nbName], 191 nbr <- tapply(obs[, nb_name],
196 as.list(obs[ , factors]), 192 as.list(obs[, factors]),
197 sum, na.rm = TRUE) 193 sum, na.rm = TRUE)
198 194
199 ## Absences as "true zero" : 195 ## Absences as "true zero" :
200 nbr[is.na(nbr)] <- 0 196 nbr[is.na(nbr)] <- 0
201 197
202 return(nbr) 198 return(nbr)
203 } 199 }
204 200
205 ######################################### end of the function calcNumber.default.f 201 ######################################### end of the function calc_nb_default_f
206 202
207 ######################################### start of the function calc.numbers.f 203 ######################################### start of the function calc_numbers_f
208 204
209 calc.numbers.f <- function(obs, ObsType="", factors=c("observation.unit", "species.code", "size.class"), nbName="number") 205 calc_numbers_f <- function(obs, obs_type = "", factors = c("observation.unit", "species.code", "size.class"), nb_name = "number") {
210 { 206 ## Purpose: Produce data.frame used as table from output of calc_nb_default_f().
211 ## Purpose: Produce data.frame used as table from output of calcNumber.default.f().
212 ## ---------------------------------------------------------------------- 207 ## ----------------------------------------------------------------------
213 ## Arguments: obs : observation table 208 ## Arguments: obs : observation table
214 ## ObsType : Type of observation (SVR, LIT, ...) 209 ## obs_type : Type of observation (SVR, LIT, ...)
215 ## factors : aggregation factors 210 ## factors : aggregation factors
216 ## nbName : name of abundance column 211 ## nb_name : name of abundance column
217 ## 212 ##
218 ## Output: data.frame with (N aggregation factors + 1) columns 213 ## Output: data.frame with (N aggregation factors + 1) columns
219 ## ---------------------------------------------------------------------- 214 ## ----------------------------------------------------------------------
220 ## Author: Yves Reecht, Date: 19 déc. 2011, 13:46 modified by Coline ROYAUX 04 june 2020 215 ## Author: Yves Reecht, Date: 19 déc. 2011, 13:46 modified by Coline ROYAUX 04 june 2020
221 216
222 if (ObsType == "SVR") 217 if (obs_type == "SVR") {
223 {
224 ## Compute SVR abundances statistics : 218 ## Compute SVR abundances statistics :
225 statRotations <- statRotationsNumber.f(factors=factors, 219 stat_rotations <- stat_rotations_nb_f(factors = factors,
226 obs=obs) 220 obs = obs)
227 221
228 ## Mean for rotating videos (3 rotations at most times) : 222 ## Mean for rotating videos (3 rotations at most times) :
229 nbr <- statRotations[["nombresMean"]] 223 nbr <- stat_rotations[["nb_mean"]]
230 224
231 }else{ 225 }else{
232 226
233 nbr <- calcNumber.default.f(obs, factors, nbName) 227 nbr <- calc_nb_default_f(obs, factors, nb_name)
234 } 228 }
235 229
236 res <- as.data.frame(as.table(nbr), responseName=nbName) 230 res <- as.data.frame(as.table(nbr), responseName = nb_name)
237 231
238 if (is.element("size.class", colnames(res))) 232 if (is.element("size.class", colnames(res))) {
239 {
240 res$size.class[res$size.class == ""] <- NA 233 res$size.class[res$size.class == ""] <- NA
241 }else{} 234 }
242 235
243 ## If integer abundances : 236 ## If integer abundances :
244 if (isTRUE(all.equal(res[ , nbName], as.integer(res[ , nbName])))) 237 if (isTRUE(all.equal(res[, nb_name], as.integer(res[, nb_name])))) {
245 { 238 res[, nb_name] <- as.integer(res[, nb_name])
246 res[ , nbName] <- as.integer(res[ , nbName]) 239 }
247 }else{} 240
248 241 if (obs_type == "SVR") {
249 if (ObsType == "SVR")
250 {
251 ## statistics on abundances : 242 ## statistics on abundances :
252 res$number.max <- as.vector(statRotations[["nombresMax"]]) 243 res[, "number.max"] <- as.vector(stat_rotations[["nb_max"]])
253 res$number.sd <- as.vector(statRotations[["nombresSD"]]) 244 res[, "number.sd"] <- as.vector(stat_rotations[["nb_sd"]])
254 245
255 }else{} 246 }
256 247
257 return(res) 248 return(res)
258 } 249 }
259 250
260 ######################################### end of the function calc.numbers.f 251 ######################################### end of the function calc_numbers_f
261 252
262 ######################################### start of the function presAbs.f called by calcBiodiv.f 253 ######################################### start of the function pres_abs_f called by calc_biodiv_f
263 254
264 presAbs.f <- function(nombres, logical=FALSE) 255 pres_abs_f <- function(nombres, logical = FALSE) {
265 {
266 ## Purpose: Compute presence absence from abundances 256 ## Purpose: Compute presence absence from abundances
267 ## ---------------------------------------------------------------------- 257 ## ----------------------------------------------------------------------
268 ## Arguments: nombres : vector of individuals count. 258 ## Arguments: nombres : vector of individuals count.
269 ## logical : (boolean) results as boolean or 0/1 ? 259 ## logical : (boolean) results as boolean or 0/1 ?
270 ## ---------------------------------------------------------------------- 260 ## ----------------------------------------------------------------------
271 ## Author: Yves Reecht, Date: 29 oct. 2010, 10:20 modified by Coline ROYAUX 04 june 2020 261 ## Author: Yves Reecht, Date: 29 oct. 2010, 10:20 modified by Coline ROYAUX 04 june 2020
272 262
273 if (any(nombres < 0, na.rm=TRUE)) 263 if (any(nombres < 0, na.rm = TRUE)) {
274 {
275 stop("Negative abundances!") 264 stop("Negative abundances!")
276 }else{} 265 }
277 266
278 if (logical) 267 if (logical) {
279 {
280 return(nombres > 0) 268 return(nombres > 0)
281 }else{ 269 }else{
282 nombres[nombres > 0] <- 1 270 nombres[nombres > 0] <- 1
283 return(nombres) 271 return(nombres)
284 } 272 }
285 } 273 }
286 274
287 ######################################### end of the function presAbs.f 275 ######################################### end of the function pres_abs_f
288 276
289 ######################################### start of the function betterCbind called by agregations.generic.f 277 ######################################### start of the function bettercbind called by agregations_generic_f
290 278
291 betterCbind <- function(..., dfList=NULL, deparse.level = 1) 279 bettercbind <- function(..., df_list = NULL, deparse.level = 1) {
292 {
293 ## Purpose: Apply cbind to data frame with mathcing columns but without 280 ## Purpose: Apply cbind to data frame with mathcing columns but without
294 ## redundancies. 281 ## redundancies.
295 ## ---------------------------------------------------------------------- 282 ## ----------------------------------------------------------------------
296 ## Arguments: same as cbind... 283 ## Arguments: same as cbind...
297 ## dfList : data.frames list 284 ## df_list : data.frames list
298 ## ---------------------------------------------------------------------- 285 ## ----------------------------------------------------------------------
299 ## Author: Yves Reecht, Date: 17 janv. 2012, 21:10 modified by Coline ROYAUX 04 june 2020 286 ## Author: Yves Reecht, Date: 17 janv. 2012, 21:10 modified by Coline ROYAUX 04 june 2020
300 287
301 if (is.null(dfList)) 288 if (is.null(df_list)) {
302 { 289 df_list <- list(...)
303 dfList <- list(...) 290 }
304 }else{}
305 291
306 return(do.call(cbind, 292 return(do.call(cbind,
307 c(list(dfList[[1]][ , c(tail(colnames(dfList[[1]]), -1), 293 c(list(df_list[[1]][, c(tail(colnames(df_list[[1]]), -1),
308 head(colnames(dfList[[1]]), 1))]), 294 head(colnames(df_list[[1]]), 1))]),
309 lapply(dfList[-1], 295 lapply(df_list[- 1],
310 function(x, colDel) 296 function(x, coldel) {
311 { 297 return(x[, !is.element(colnames(x),
312 return(x[ , !is.element(colnames(x), 298 coldel),
313 colDel), 299 drop = FALSE])
314 drop=FALSE])
315 }, 300 },
316 colDel=colnames(dfList[[1]])), 301 coldel = colnames(df_list[[1]])),
317 deparse.level=deparse.level))) 302 deparse.level = deparse.level)))
318 } 303 }
319 304
320 ######################################### end of the function betterCbind 305 ######################################### end of the function bettercbind
321 306
322 ######################################### start of the function agregation.f called by agregations.generic.f 307 ######################################### start of the function agregation_f called by agregations_generic_f
323 308
324 agregation.f <- function(metric, Data, factors, casMetrique, 309 agregation_f <- function(metric, d_ata, factors, cas_metric,
325 nbName="number") 310 nb_name = "number") {
326 {
327 ## Purpose: metric aggregation 311 ## Purpose: metric aggregation
328 ## ---------------------------------------------------------------------- 312 ## ----------------------------------------------------------------------
329 ## Arguments: metric: colnames of chosen metric 313 ## Arguments: metric: colnames of chosen metric
330 ## Data: Unaggregated data table 314 ## d_ata: Unaggregated data table
331 ## factors: aggregation factors vector 315 ## factors: aggregation factors vector
332 ## casMetrique: named vector of observation types depending 316 ## cas_metric: named vector of observation types depending
333 ## on chosen metric 317 ## on chosen metric
334 ## nbName : abundance column name 318 ## nb_name : abundance column name
335 ## ---------------------------------------------------------------------- 319 ## ----------------------------------------------------------------------
336 ## Author: Yves Reecht, Date: 20 déc. 2011, 14:29 modified by Coline ROYAUX 04 june 2020 320 ## Author: Yves Reecht, Date: 20 déc. 2011, 14:29 modified by Coline ROYAUX 04 june 2020
337 321
338 switch(casMetrique[metric], 322 switch(cas_metric[metric],
339 "sum"={ 323 "sum" = {
340 res <- tapply(Data[ , metric], 324 res <- tapply(d_ata[, metric],
341 as.list(Data[ , factors, drop=FALSE]), 325 as.list(d_ata[, factors, drop = FALSE]),
342 function(x) 326 function(x) {
343 {
344 ifelse(all(is.na(x)), 327 ifelse(all(is.na(x)),
345 NA, 328 NA,
346 sum(x, na.rm=TRUE)) 329 sum(x, na.rm = TRUE))
347 }) 330 })
348 }, 331 },
349 "w.mean"={ 332 "w.mean" = {
350 res <- tapply(1:nrow(Data), 333 res <- tapply(seq_len(nrow(d_ata)),
351 as.list(Data[ , factors, drop=FALSE]), 334 as.list(d_ata[, factors, drop = FALSE]),
352 function(ii) 335 function(ii) {
353 { 336 ifelse(all(is.na(d_ata[ii, metric])),
354 ifelse(all(is.na(Data[ii, metric])),
355 NA, 337 NA,
356 weighted.mean(Data[ii, metric], 338 weighted.mean(d_ata[ii, metric],
357 Data[ii, nbName], 339 d_ata[ii, nb_name],
358 na.rm=TRUE)) 340 na.rm = TRUE))
359 }) 341 })
360 }, 342 },
361 "w.mean.colonies"={ 343 "w.mean.colonies" = {
362 res <- tapply(1:nrow(Data), 344 res <- tapply(seq_len(nrow(d_ata)),
363 as.list(Data[ , factors, drop=FALSE]), 345 as.list(d_ata[, factors, drop = FALSE]),
364 function(ii) 346 function(ii) {
365 { 347 ifelse(all(is.na(d_ata[ii, metric])),
366 ifelse(all(is.na(Data[ii, metric])),
367 NA, 348 NA,
368 weighted.mean(Data[ii, metric], 349 weighted.mean(d_ata[ii, metric],
369 Data[ii, "colonies"], 350 d_ata[ii, "colonies"],
370 na.rm=TRUE)) 351 na.rm = TRUE))
371 }) 352 })
372 }, 353 },
373 "w.mean.prop"={ 354 "w.mean.prop" = {
374 res <- tapply(1:nrow(Data), 355 res <- tapply(seq_len(nrow(d_ata)),
375 as.list(Data[ , factors, drop=FALSE]), 356 as.list(d_ata[, factors, drop = FALSE]),
376 function(ii) 357 function(ii) {
377 { 358 ifelse(all(is.na(d_ata[ii, metric])) || sum(d_ata[ii, "nombre.tot"], na.rm = TRUE) == 0,
378 ifelse(all(is.na(Data[ii, metric])) || sum(Data[ii, "nombre.tot"], na.rm=TRUE) == 0,
379 NA, 359 NA,
380 ifelse(all(na.omit(Data[ii, metric]) == 0), # Pour ne pas avoir NaN. 360 ifelse(all(na.omit(d_ata[ii, metric]) == 0),
381 0, 361 0,
382 (sum(Data[ii, nbName][ !is.na(Data[ii, metric])], na.rm=TRUE) / 362 (sum(d_ata[ii, nb_name][!is.na(d_ata[ii, metric])], na.rm = TRUE) /
383 sum(Data[ii, "nombre.tot"], na.rm=TRUE)) * 363 sum(d_ata[ii, "nombre.tot"], na.rm = TRUE)) *
384 ## Correction if size class isn't an aggregation factor 364 ## Correction if size class isn't an aggregation factor
385 ## (otherwise value divided by number of present classes) : 365 ## (otherwise value divided by number of present classes) :
386 ifelse(is.element("size.class", factors), 366 ifelse(is.element("size.class", factors),
387 100, 367 100,
388 100 * length(unique(Data$size.class))))) 368 100 * length(unique(d_ata$size.class)))))
389 }) 369 })
390 370
391 }, 371 },
392 "w.mean.prop.bio"={ 372 "w.mean.prop.bio" = {
393 res <- tapply(1:nrow(Data), 373 res <- tapply(seq_len(nrow(d_ata)),
394 as.list(Data[ , factors, drop=FALSE]), 374 as.list(d_ata[, factors, drop = FALSE]),
395 function(ii) 375 function(ii) {
396 { 376 ifelse(all(is.na(d_ata[ii, metric])) || sum(d_ata[ii, "tot.biomass"], na.rm = TRUE) == 0,
397 ifelse(all(is.na(Data[ii, metric])) || sum(Data[ii, "tot.biomass"], na.rm=TRUE) == 0,
398 NA, 377 NA,
399 ifelse(all(na.omit(Data[ii, metric]) == 0), # Pour ne pas avoir NaN. 378 ifelse(all(na.omit(d_ata[ii, metric]) == 0),
400 0, 379 0,
401 (sum(Data[ii, "biomass"][ !is.na(Data[ii, metric])], na.rm=TRUE) / 380 (sum(d_ata[ii, "biomass"][!is.na(d_ata[ii, metric])], na.rm = TRUE) /
402 sum(Data[ii, "tot.biomass"], na.rm=TRUE)) * 381 sum(d_ata[ii, "tot.biomass"], na.rm = TRUE)) *
403 ## Correction if size class isn't an aggregation factor 382 ## Correction if size class isn't an aggregation factor
404 ## (otherwise value divided by number of present classes) : 383 ## (otherwise value divided by number of present classes) :
405 ifelse(is.element("size.class", factors), 384 ifelse(is.element("size.class", factors),
406 100, 385 100,
407 100 * length(unique(Data$size.class))))) 386 100 * length(unique(d_ata$size.class)))))
408 }) 387 })
409 388
410 }, 389 },
411 "pres"={ 390 "pres" = {
412 res <- tapply(Data[ , metric], 391 res <- tapply(d_ata[, metric],
413 as.list(Data[ , factors, drop=FALSE]), 392 as.list(d_ata[, factors, drop = FALSE]),
414 function(x) 393 function(x) {
415 {
416 ifelse(all(is.na(x)), # When only NAs. 394 ifelse(all(is.na(x)), # When only NAs.
417 NA, 395 NA,
418 ifelse(any(x > 0, na.rm=TRUE), # Otherwise... 396 ifelse(any(x > 0, na.rm = TRUE), # Otherwise...
419 1, # ... presence if at least one observation in the group. 397 1, # ... presence if at least one observation in the group.
420 0)) 398 0))
421 }) 399 })
422 }, 400 },
423 "nbMax"={ 401 "nbMax" = {
424 ## Recuperation of raw abundances with selections :
425 nbTmp <- getReducedSVRdata.f(dataName=".NombresSVR", data=Data)
426 402
427 ## Sum by factor cross / rotation : 403 ## Sum by factor cross / rotation :
428 nbTmp2 <- apply(nbTmp, 404 nb_tmp2 <- apply(nb_tmp,
429 which(is.element(names(dimnames(nbTmp)), c(factors, "rotation"))), 405 which(is.element(names(dimnames(nb_tmp)), c(factors, "rotation"))),
430 function(x) 406 function(x) {
431 { 407 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE))
432 ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE))
433 }) 408 })
434 409
435 ## Sum by factor cross : 410 ## Sum by factor cross :
436 res <- as.array(apply(nbTmp2, 411 res <- as.array(apply(nb_tmp2,
437 which(is.element(names(dimnames(nbTmp)), factors)), 412 which(is.element(names(dimnames(nb_tmp)), factors)),
438 function(x) 413 function(x) {
439 { 414 ifelse(all(is.na(x)), NA, max(x, na.rm = TRUE))
440 ifelse(all(is.na(x)), NA, max(x, na.rm=TRUE))
441 })) 415 }))
442 }, 416 },
443 "nbSD"={ 417 "nbSD" = {
444 ## Recuperation of raw abundances with selections :
445 nbTmp <- getReducedSVRdata.f(dataName=".NombresSVR", data=Data)
446 418
447 ## Sum by factor cross / rotation : 419 ## Sum by factor cross / rotation :
448 nbTmp2 <- apply(nbTmp, 420 nb_tmp2 <- apply(nb_tmp,
449 which(is.element(names(dimnames(nbTmp)), c(factors, "rotation"))), 421 which(is.element(names(dimnames(nb_tmp)), c(factors, "rotation"))),
450 function(x) 422 function(x) {
451 { 423 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE))
452 ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE))
453 }) 424 })
454 425
455 ## Sum by factor cross : 426 ## Sum by factor cross :
456 res <- as.array(apply(nbTmp2, 427 res <- as.array(apply(nb_tmp2,
457 which(is.element(names(dimnames(nbTmp)), factors)), 428 which(is.element(names(dimnames(nb_tmp)), factors)),
458 function(x) 429 function(x) {
459 { 430 ifelse(all(is.na(x)), NA, sd(x, na.rm = TRUE))
460 ifelse(all(is.na(x)), NA, sd(x, na.rm=TRUE))
461 })) 431 }))
462 }, 432 },
463 "densMax"={ 433 "densMax" = {
464 ## Recuperation of raw abundances with selections :
465 densTmp <- getReducedSVRdata.f(dataName=".DensitesSVR", data=Data)
466 434
467 ## Sum by factor cross / rotation : 435 ## Sum by factor cross / rotation :
468 densTmp2 <- apply(densTmp, 436 dens_tmp2 <- apply(dens_tmp,
469 which(is.element(names(dimnames(densTmp)), c(factors, "rotation"))), 437 which(is.element(names(dimnames(dens_tmp)), c(factors, "rotation"))),
470 function(x) 438 function(x) {
471 { 439 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE))
472 ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE))
473 }) 440 })
474 441
475 ## Sum by factor cross : 442 ## Sum by factor cross :
476 res <- as.array(apply(densTmp2, 443 res <- as.array(apply(dens_tmp2,
477 which(is.element(names(dimnames(densTmp)), factors)), 444 which(is.element(names(dimnames(dens_tmp)), factors)),
478 function(x) 445 function(x) {
479 { 446 ifelse(all(is.na(x)), NA, max(x, na.rm = TRUE))
480 ifelse(all(is.na(x)), NA, max(x, na.rm=TRUE))
481 })) 447 }))
482 }, 448 },
483 "densSD"={ 449 "densSD" = {
484 ## Recuperation of raw abundances with selections :
485 densTmp <- getReducedSVRdata.f(dataName=".DensitesSVR", data=Data)
486 450
487 ## Sum by factor cross / rotation : 451 ## Sum by factor cross / rotation :
488 densTmp2 <- apply(densTmp, 452 dens_tmp2 <- apply(dens_tmp,
489 which(is.element(names(dimnames(densTmp)), c(factors, "rotation"))), 453 which(is.element(names(dimnames(dens_tmp)), c(factors, "rotation"))),
490 function(x) 454 function(x) {
491 { 455 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE))
492 ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE))
493 }) 456 })
494 457
495 ## Sum by factor cross : 458 ## Sum by factor cross :
496 res <- as.array(apply(densTmp2, 459 res <- as.array(apply(dens_tmp2,
497 which(is.element(names(dimnames(densTmp)), factors)), 460 which(is.element(names(dimnames(dens_tmp)), factors)),
498 function(x) 461 function(x) {
499 { 462 ifelse(all(is.na(x)), NA, sd(x, na.rm = TRUE))
500 ifelse(all(is.na(x)), NA, sd(x, na.rm=TRUE))
501 })) 463 }))
502 }, 464 },
503 "%.nesting"={ 465 "%.nesting" = {
504 res <- tapply(1:nrow(Data), 466 res <- tapply(seq_len(nrow(d_ata)),
505 as.list(Data[ , factors, drop=FALSE]), 467 as.list(d_ata[, factors, drop = FALSE]),
506 function(ii) 468 function(ii) {
507 { 469 ifelse(all(is.na(d_ata[ii, metric])),
508 ifelse(all(is.na(Data[ii, metric])),
509 NA, 470 NA,
510 weighted.mean(Data[ii, metric], 471 weighted.mean(d_ata[ii, metric],
511 Data[ii, "readable.tracks"], 472 d_ata[ii, "readable.tracks"],
512 na.rm=TRUE)) 473 na.rm = TRUE))
513 }) 474 })
514 }, 475 },
515 stop("Not implemented!") 476 stop("Not implemented!")
516 ) 477 )
517 478
518 ## dimension names 479 ## dimension names
519 names(dimnames(res)) <- c(factors) 480 names(dimnames(res)) <- c(factors)
520 481
521 ## Transformation to long format : 482 ## Transformation to long format :
522 reslong <- as.data.frame(as.table(res), responseName=metric) 483 reslong <- as.data.frame(as.table(res), responseName = metric)
523 reslong <- reslong[ , c(tail(colnames(reslong), 1), head(colnames(reslong), -1))] # metric first 484 reslong <- reslong[, c(tail(colnames(reslong), 1), head(colnames(reslong), -1))] # metric first
524 485
525 return(reslong) 486 return(reslong)
526 } 487 }
527 488
528 ######################################### end of the function agregation.f 489 ######################################### end of the function agregation_f
529 490
530 ######################################### start of the function agregations.generic.f called y calcBiodiv.f in FucntExeCalcCommIndexesGalaxy.r 491 ######################################### start of the function agregations_generic_f called y calc_biodiv_f in FucntExeCalcCommIndexesGalaxy.r
531 492
532 agregations.generic.f <- function(Data, metrics, factors, listFact=NULL, unitSpSz=NULL, unitSp=NULL, 493 agregations_generic_f <- function(d_ata, metrics, factors, list_fact = NULL, unit_sp_sz = NULL, unit_sp = NULL,
533 nbName="number") 494 nb_name = "number") {
534 { 495 ## Purpose: Aggregate data
535 ## Purpose: Aggregate data 496 ## ----------------------------------------------------------------------
536 ## ---------------------------------------------------------------------- 497 ## Arguments: d_ata : data set
537 ## Arguments: Data : data set
538 ## metrics : aggregated metric 498 ## metrics : aggregated metric
539 ## factors : aggregation factors 499 ## factors : aggregation factors
540 ## listFact : other factors to aggregate and add to output 500 ## list_fact : other factors to aggregate and add to output
541 ## unitSpSz : Metrics table by unitobs/species/Size Class 501 ## unit_sp_sz : Metrics table by unitobs/species/Size Class
542 ## unitSp : Metrics table by unitobs/species 502 ## unit_sp : Metrics table by unitobs/species
543 ## nbName : abundance colname 503 ## nb_name : abundance colname
544 ## 504 ##
545 ## Output : aggregated data frame 505 ## Output : aggregated data frame
546 ## ---------------------------------------------------------------------- 506 ## ----------------------------------------------------------------------
547 ## Author: Yves Reecht, Date: 18 oct. 2010, 15:47 modified by Coline ROYAUX 04 june 2020 507 ## Author: Yves Reecht, Date: 18 oct. 2010, 15:47 modified by Coline ROYAUX 04 june 2020
548 508
549 ## trt depending on metric type : 509 ## trt depending on metric type :
550 casMetrique <- c("number"="sum", 510 cas_metric <- c("number" = "sum",
551 "mean.length"="w.mean", 511 "mean.length" = "w.mean",
552 "taille_moy"="w.mean", 512 "taille_moy" = "w.mean",
553 "biomass"="sum", 513 "biomass" = "sum",
554 "Biomass"="sum", 514 "Biomass" = "sum",
555 "weight"="sum", 515 "weight" = "sum",
556 "mean.weight"="w.mean", 516 "mean.weight" = "w.mean",
557 "density"="sum", 517 "density" = "sum",
558 "Density"="sum", 518 "Density" = "sum",
559 "CPUE"="sum", 519 "CPUE" = "sum",
560 "CPUE.biomass"="sum", 520 "CPUE.biomass" = "sum",
561 "pres.abs"="pres", 521 "presence_absence" = "pres",
562 "abundance.prop.SC"="w.mean.prop", # Not OK [!!!] ? 522 "abundance.prop.SC" = "w.mean.prop", # Not OK [!!!] ?
563 "biomass.prop.SC"="w.mean.prop.bio", # Not OK [!!!] ? 523 "biomass.prop.SC" = "w.mean.prop.bio", # Not OK [!!!] ?
564 ## Benthos : 524 ## Benthos :
565 "colonies"="sum", 525 "colonies" = "sum",
566 "coverage"="sum", 526 "coverage" = "sum",
567 "mean.size.colonies"="w.mean.colonies", 527 "mean.size.colonies" = "w.mean.colonies",
568 ## SVR (expérimental) : 528 ## SVR (expérimental) :
569 "number.max"="nbMax", 529 "number.max" = "nbMax",
570 "number.sd"="nbSD", 530 "number.sd" = "nbSD",
571 "density.max"="densMax", 531 "density.max" = "densMax",
572 "density.sd"="densSD", 532 "density.sd" = "densSD",
573 "biomass.max"="sum", 533 "biomass.max" = "sum",
574 "spawning.success"="%.nesting", 534 "spawning.success" = "%.nesting",
575 "spawnings"="sum", 535 "spawnings" = "sum",
576 "readable.tracks"="sum", 536 "readable.tracks" = "sum",
577 "tracks.number"="sum") 537 "tracks.number" = "sum")
578 538
579 ## add "readable.tracks" for egg laying percentage : 539 ## add "readable.tracks" for egg laying percentage :
580 if (any(casMetrique[metrics] == "%.nesting")) 540 if (any(cas_metric[metrics] == "%.nesting")) {
581 { 541 if (is.element("size.class", colnames(d_ata))) {
582 if (is.element("size.class", colnames(Data))) 542 if (is.null(unit_sp_sz)) stop("unit_sp_sz doit être défini")
583 { 543
584 if (is.null(unitSpSz)) stop("unitSpSz doit être défini") 544 d_ata <- merge(d_ata,
585 545 unit_sp_sz[, c("species.code", "observation.unit", "size.class", "readable.tracks")],
586 Data <- merge(Data, 546 by = c("species.code", "observation.unit", "size.class"),
587 unitSpSz[ , c("species.code", "observation.unit", "size.class", "readable.tracks")], 547 suffixes = c("", ".y"))
588 by=c("species.code", "observation.unit", "size.class"),
589 suffixes=c("", ".y"))
590 }else{ 548 }else{
591 if (is.null(unitSp)) stop("unitSp must be defined") 549 if (is.null(unit_sp)) stop("unit_sp must be defined")
592 550
593 Data <- merge(Data, 551 d_ata <- merge(d_ata,
594 unitSp[ , c("species.code", "observation.unit", "readable.tracks")], 552 unit_sp[, c("species.code", "observation.unit", "readable.tracks")],
595 by=c("species.code", "observation.unit"), 553 by = c("species.code", "observation.unit"),
596 suffixes=c("", ".y")) 554 suffixes = c("", ".y"))
597 } 555 }
598 }else{} 556 }
599 557
600 ## Add "number" field for computing ponderate means if absent : 558 ## Add "number" field for computing ponderate means if absent :
601 if (any(casMetrique[metrics] == "w.mean" | casMetrique[metrics] == "w.mean.prop")) 559 if (any(cas_metric[metrics] == "w.mean" | cas_metric[metrics] == "w.mean.prop")) {
602 { 560 if (is.element("size.class", colnames(d_ata))) {
603 if (is.element("size.class", colnames(Data))) 561 if (is.null(unit_sp_sz)) stop("unit_sp_sz must be defined")
604 { 562
605 if (is.null(unitSpSz)) stop("unitSpSz must be defined") 563 d_ata <- merge(d_ata,
606 564 unit_sp_sz[, c("species.code", "observation.unit", "size.class", nb_name)],
607 Data <- merge(Data, 565 by = c("species.code", "observation.unit", "size.class"),
608 unitSpSz[ , c("species.code", "observation.unit", "size.class", nbName)], 566 suffixes = c("", ".y"))
609 by=c("species.code", "observation.unit", "size.class"),
610 suffixes=c("", ".y"))
611 567
612 ## add tot abundance / species / observation unit : 568 ## add tot abundance / species / observation unit :
613 nbTot <- tapply(unitSpSz[ , nbName], 569 nb_tot <- tapply(unit_sp_sz[, nb_name],
614 as.list(unitSpSz[ , c("species.code", "observation.unit")]), 570 as.list(unit_sp_sz[, c("species.code", "observation.unit")]),
615 sum, na.rm=TRUE) 571 sum, na.rm = TRUE)
616 572
617 Data <- merge(Data, 573 d_ata <- merge(d_ata,
618 as.data.frame(as.table(nbTot), responseName="nombre.tot")) 574 as.data.frame(as.table(nb_tot), responseName = "nombre.tot"))
619 }else{ 575 }else{
620 if (is.null(unitSp)) stop("unitSp must be defined") 576 if (is.null(unit_sp)) stop("unit_sp must be defined")
621 577
622 Data <- merge(Data, 578 d_ata <- merge(d_ata,
623 unitSp[ , c("species.code", "observation.unit", nbName)], # [!!!] unitSpSz ? 579 unit_sp[, c("species.code", "observation.unit", nb_name)], # [!!!] unit_sp_sz ?
624 by=c("species.code", "observation.unit"), 580 by = c("species.code", "observation.unit"),
625 suffixes=c("", ".y")) 581 suffixes = c("", ".y"))
626 } 582 }
627 }else{} 583 }
628 584
629 ## Add biomass field of biomass proportion by size class : 585 ## Add biomass field of biomass proportion by size class :
630 if (any(casMetrique[metrics] == "w.mean.prop.bio")) 586 if (any(cas_metric[metrics] == "w.mean.prop.bio")) {
631 { 587 if (is.null(unit_sp_sz)) stop("unit_sp_sz doit être défini")
632 if (is.null(unitSpSz)) stop("unitSpSz doit être défini") 588
633 589 d_ata <- merge(d_ata,
634 Data <- merge(Data, 590 unit_sp_sz[, c("species.code", "observation.unit", "size.class", "biomass")],
635 unitSpSz[ , c("species.code", "observation.unit", "size.class", "biomass")], 591 by = c("species.code", "observation.unit", "size.class"),
636 by=c("species.code", "observation.unit", "size.class"), 592 suffixes = c("", ".y"))
637 suffixes=c("", ".y"))
638 593
639 ## add tot biomass / species / observation unit : 594 ## add tot biomass / species / observation unit :
640 biomTot <- tapply(unitSpSz$biomass, 595 biom_tot <- tapply(unit_sp_sz$biomass,
641 as.list(unitSpSz[ , c("species.code", "observation.unit")]), 596 as.list(unit_sp_sz[, c("species.code", "observation.unit")]),
642 function(x) 597 function(x) {
643 {
644 ifelse(all(is.na(x)), 598 ifelse(all(is.na(x)),
645 NA, 599 NA,
646 sum(x, na.rm=TRUE)) 600 sum(x, na.rm = TRUE))
647 }) 601 })
648 602
649 Data <- merge(Data, 603 d_ata <- merge(d_ata,
650 as.data.frame(as.table(biomTot), responseName="tot.biomass")) 604 as.data.frame(as.table(biom_tot), responseName = "tot.biomass"))
651 } 605 }
652 606
653 ## add colony field for ponderate means pondérées if absent : 607 ## add colony field for ponderate means pondérées if absent :
654 if (any(casMetrique[metrics] == "w.mean.colonies" & ! is.element("colonies", colnames(Data)))) 608 if (any(cas_metric[metrics] == "w.mean.colonies" & ! is.element("colonies", colnames(d_ata)))) {
655 { 609 d_ata$colonies <- unit_sp[match(apply(d_ata[, c("species.code", "observation.unit")],
656 Data$colonies <- unitSp[match(apply(Data[ , c("species.code", "observation.unit")], 610 1, paste, collapse = "*"),
657 1, paste, collapse="*"), 611 apply(unit_sp[, c("species.code", "observation.unit")],
658 apply(unitSp[ , c("species.code", "observation.unit")], 612 1, paste, collapse = "*")), "colonies"]
659 1, paste, collapse="*")), "colonies"] 613 }
660 }else{}
661 614
662 615
663 ## Aggregation of metric depending on factors : 616 ## Aggregation of metric depending on factors :
664 reslong <- betterCbind(dfList=lapply(metrics, # sapply used to have names 617 reslong <- bettercbind(df_list = lapply(metrics, # sapply used to have names
665 agregation.f, 618 agregation_f,
666 Data=Data, factors=factors, casMetrique=casMetrique, 619 d_ata = d_ata, factors = factors, cas_metric = cas_metric,
667 nbName=nbName)) 620 nb_name = nb_name))
668 621
669 ## Aggregation and add other factors : 622 ## Aggregation and add other factors :
670 if ( ! (is.null(listFact) || length(listFact) == 0)) 623 if (! (is.null(list_fact) || length(list_fact) == 0)) {
671 {
672 reslong <- cbind(reslong, 624 reslong <- cbind(reslong,
673 sapply(Data[ , listFact, drop=FALSE], 625 sapply(d_ata[, list_fact, drop = FALSE],
674 function(fact) 626 function(fact) {
675 {
676 tapply(fact, 627 tapply(fact,
677 as.list(Data[ , factors, drop=FALSE]), 628 as.list(d_ata[, factors, drop = FALSE]),
678 function(x) 629 function(x) {
679 { 630 if (length(x) > 1 && length(unique(x)) > 1) { # must be one modality
680 if (length(x) > 1 && length(unique(x)) > 1) # must be one modality
681 {
682 return(NULL) # otherwise it is NULL 631 return(NULL) # otherwise it is NULL
683 }else{ 632 }else{
684 unique(as.character(x)) 633 unique(as.character(x))
685 } 634 }
686 }) 635 })
687 })) 636 }))
688 }else{} 637 }
689 638
690 ## If some factors aren't at the right class : 639 ## If some factors aren't at the right class :
691 if (any(tmp <- sapply(reslong[ , listFact, drop=FALSE], class) != sapply(Data[ , listFact, drop=FALSE], class))) 640 if (any(tmp <- sapply(reslong[, list_fact, drop = FALSE], class) != sapply(d_ata[, list_fact, drop = FALSE], class))) {
692 { 641 for (i in which(tmp)) {
693 for (i in which(tmp)) 642 switch(sapply(d_ata[, list_fact, drop = FALSE], class)[i],
694 { 643 "integer" = {
695 switch(sapply(Data[ , listFact, drop=FALSE], class)[i], 644 reslong[, list_fact[i]] <- as.integer(as.character(reslong[, list_fact[i]]))
696 "integer"={
697 reslong[ , listFact[i]] <- as.integer(as.character(reslong[ , listFact[i]]))
698 }, 645 },
699 "numeric"={ 646 "numeric" = {
700 reslong[ , listFact[i]] <- as.numeric(as.character(reslong[ , listFact[i]])) 647 reslong[, list_fact[i]] <- as.numeric(as.character(reslong[, list_fact[i]]))
701 }, 648 },
702 reslong[ , listFact[i]] <- eval(call(paste("as", sapply(Data[ , listFact, drop=FALSE], class)[i], sep="."), 649 reslong[, list_fact[i]] <- eval(call(paste("as", sapply(d_ata[, list_fact, drop = FALSE], class)[i], sep = "."),
703 reslong[ , listFact[i]])) 650 reslong[, list_fact[i]]))
704 ) 651 )
705 } 652 }
706 }else{} 653 }
707 654
708 ## Initial order of factors levels : 655 ## Initial order of factors levels :
709 reslong <- as.data.frame(sapply(colnames(reslong), 656 reslong <- as.data.frame(sapply(colnames(reslong),
710 function(x) 657 function(x) {
711 { 658 if (is.factor(reslong[, x])) {
712 if (is.factor(reslong[ , x])) 659 return(factor(reslong[, x], levels = levels(d_ata[, x])))
713 {
714 return(factor(reslong[ , x], levels=levels(Data[ , x])))
715 }else{ 660 }else{
716 return(reslong[ , x]) 661 return(reslong[, x])
717 } 662 }
718 }, simplify=FALSE)) 663 }, simplify = FALSE))
719 664
720 665
721 ## Check of other aggregated factors supplémentaires. There must be no NULL elements : 666 ## Check of other aggregated factors supplémentaires. There must be no NULL elements :
722 if (any(sapply(reslong[ , listFact], function(x){any(is.null(unlist(x)))}))) 667 if (any(sapply(reslong[, list_fact], function(x) {
723 { 668 any(is.null(unlist(x)))
669 }))) {
724 warning(paste("One of the suppl. factors is probably a subset", 670 warning(paste("One of the suppl. factors is probably a subset",
725 " of the observations grouping factor(s).", sep="")) 671 " of the observations grouping factor(s).", sep = ""))
726 return(NULL) 672 return(NULL)
727 }else{ 673 }else{
728 return(reslong) 674 return(reslong)
729 } 675 }
730 } 676 }
731 677
732 ######################################### end of the function agregations.generic.f 678 ######################################### end of the function agregations_generic_f
733 679
734 ######################################### start of the function dropLevels.f called y calcBiodiv.f in FucntExeCalcCommIndexesGalaxy.r and modeleLineaireWP2.unitobs.f in FunctExeCalcGLMGalaxy.r 680 ######################################### start of the function drop_levels_f called y calc_biodiv_f in FucntExeCalcCommIndexesGalaxy.r and glm_community in FunctExeCalcGLMGalaxy.r
735 dropLevels.f <- function(df, which=NULL) 681 drop_levels_f <- function(df, which = NULL) {
736 {
737 ## Purpose: Suppress unused levels of factors 682 ## Purpose: Suppress unused levels of factors
738 ## ---------------------------------------------------------------------- 683 ## ----------------------------------------------------------------------
739 ## Arguments: df : a data.frame 684 ## Arguments: df : a data.frame
740 ## which : included columns index (all by default) 685 ## which : included columns index (all by default)
741 ## ---------------------------------------------------------------------- 686 ## ----------------------------------------------------------------------
742 ## Author: Yves Reecht, Date: 10 août 2010, 13:29 modified by Coline ROYAUX 04 june 2020 687 ## Author: Yves Reecht, Date: 10 août 2010, 13:29 modified by Coline ROYAUX 04 june 2020
743 688
744 if (class(df) != "data.frame") 689 if (class(df) != "data.frame") {
745 {
746 stop("'df' must be a data.frame") 690 stop("'df' must be a data.frame")
747 }else{ 691 }else{
748 if (is.null(which)) 692 if (is.null(which)) {
749 { 693 x <- as.data.frame(sapply(df, function(x) {
750 x <- as.data.frame(sapply(df, function(x) 694 return(x[, drop = TRUE])
751 { 695 }, simplify = FALSE),
752 return(x[ ,drop=TRUE]) 696 stringsAsFactors = FALSE)
753 }, simplify=FALSE),
754 stringsAsFactors=FALSE)
755 }else{ # Only some columns used 697 }else{ # Only some columns used
756 x <- df 698 x <- df
757 699
758 x[ , which] <- as.data.frame(sapply(df[ , which, drop=FALSE], 700 x[, which] <- as.data.frame(sapply(df[, which, drop = FALSE],
759 function(x) 701 function(x) {
760 { 702 return(x[, drop = TRUE])
761 return(x[ , drop=TRUE]) 703 }, simplify = FALSE),
762 }, simplify=FALSE), 704 stringsAsFactors = FALSE)
763 stringsAsFactors=FALSE)
764 } 705 }
765 706
766 return(x) 707 return(x)
767 } 708 }
768 } 709 }
769 ######################################### end of the function dropLevels.f 710 ######################################### end of the function drop_levels_f
770 711
771 ######################################### start of the function subsetToutesTables.f called by modeleLineaireWP2.unitobs.f in FunctExeCalcGLMGalaxy.r 712 ######################################### start of the function subset_all_tables_f called by glm_community in FunctExeCalcGLMGalaxy.r
772 713
773 subsetToutesTables.f <- function(metrique, tabMetrics, facteurs, selections, 714 subset_all_tables_f <- function(metrique, tab_metrics, facteurs, selections,
774 tabUnitobs, refesp, tableMetrique="", nbName="number", ObsType = "", 715 tab_unitobs, refesp, tab_metrique = "", nb_name = "number", obs_type = "",
775 exclude=NULL, add=c("species.code", "observation.unit")) 716 exclude = NULL, add = c("species.code", "observation.unit")) {
776 {
777 ## Purpose: Extract useful data only from chosen metrics and factors 717 ## Purpose: Extract useful data only from chosen metrics and factors
778 ## ---------------------------------------------------------------------- 718 ## ----------------------------------------------------------------------
779 ## Arguments: metrique : chosen metric 719 ## Arguments: metrique : chosen metric
780 ## facteurs : all chosen factors 720 ## facteurs : all chosen factors
781 ## selections : corresponding modality selected 721 ## selections : corresponding modality selected
782 ## tableMetrique : metrics table name 722 ## tab_metrique : metrics table name
783 ## exclude : factors levels to exclude 723 ## exclude : factors levels to exclude
784 ## add : field to add to data table 724 ## add : field to add to data table
785 ## ---------------------------------------------------------------------- 725 ## ----------------------------------------------------------------------
786 ## Author: Yves Reecht, Date: 6 août 2010, 16:46 modified by Coline ROYAUX 04 june 2020 726 ## Author: Yves Reecht, Date: 6 août 2010, 16:46 modified by Coline ROYAUX 04 june 2020
787 727
788 ## If no metrics table available : 728 ## If no metrics table available :
789 if (is.element(tableMetrique, c("", "TableOccurrences", "TablePresAbs"))) 729 if (is.element(tab_metrique, c("", "TableOccurrences", "TablePresAbs"))) {
790 { 730 tab_metrique <- "unit_sp"
791 tableMetrique <- "unitSp" 731 }
792 }else{} 732
793 733 cas_tables <- c("unit_sp" = "unit_sp",
794 casTables <- c("unitSp"="unitSp", 734 "TablePresAbs" = "unit_sp",
795 "TablePresAbs"="unitSp", 735 "unit_sp_sz" = "unit_sp_sz")
796 "unitSpSz"="unitSpSz")
797 736
798 ## Recuperation of metrics table : 737 ## Recuperation of metrics table :
799 dataMetrique <- tabMetrics 738 data_metric <- tab_metrics
800 unitobs <- tabUnitobs 739 unitobs <- tab_unitobs
801 refesp <- refesp 740 refesp <- refesp
802 741
803 ## If no metrics available or already computed : 742 ## If no metrics available or already computed :
804 if (is.element(metrique, c("", "occurrence.frequency"))) 743 if (is.element(metrique, c("", "occurrence.frequency"))) {
805 {
806 metrique <- "tmp" 744 metrique <- "tmp"
807 dataMetrique$tmp <- 0 745 data_metric$tmp <- 0
808 dataMetrique$tmp[dataMetrique[ , nbName] > 0] <- 1 746 data_metric$tmp[data_metric[, nb_name] > 0] <- 1
809 }else{} 747 }
810 748
811 if (!is.null(add)) 749 if (!is.null(add)) {
812 { 750 metriques <- c(metrique, add[is.element(add, colnames(data_metric))])
813 metriques <- c(metrique, add[is.element(add, colnames(dataMetrique))])
814 }else{ 751 }else{
815 metriques <- metrique 752 metriques <- metrique
816 } 753 }
817 754
818 ## Subset depending on metrics table 755 ## Subset depending on metrics table
819 switch(casTables[tableMetrique], 756 switch(cas_tables[tab_metrique],
820 ## Observation table by unitobs and species : 757 ## Observation table by unitobs and species :
821 unitSp={ 758 unit_sp = {
822 restmp <- cbind(dataMetrique[!is.na(dataMetrique[ , metrique]) , metriques, drop=FALSE], 759 restmp <- cbind(data_metric[!is.na(data_metric[, metrique]), metriques, drop = FALSE],
823 unitobs[match(dataMetrique$observation.unit[!is.na(dataMetrique[ , metrique])], 760 unitobs[match(data_metric$observation.unit[!is.na(data_metric[, metrique])],
824 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs 761 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs
825 facteurs[is.element(facteurs, colnames(unitobs))], drop=FALSE], 762 facteurs[is.element(facteurs, colnames(unitobs))], drop = FALSE],
826 refesp[match(dataMetrique$species.code[!is.na(dataMetrique[ , metrique])], 763 refesp[match(data_metric$species.code[!is.na(data_metric[, metrique])],
827 refesp$species.code), # ajout des colonnes sélectionnées d'especes 764 refesp$species.code), # ajout des colonnes sélectionnées d'especes
828 facteurs[is.element(facteurs, colnames(refesp))], drop=FALSE]) 765 facteurs[is.element(facteurs, colnames(refesp))], drop = FALSE])
829 }, 766 },
830 ## Observation table by unitobs, species and size class : 767 ## Observation table by unitobs, species and size class :
831 unitSpSz={ 768 unit_sp_sz = {
832 restmp <- cbind(dataMetrique[!is.na(dataMetrique[ , metrique]) , 769 restmp <- cbind(data_metric[!is.na(data_metric[, metrique]),
833 c(metriques, "size.class"), drop=FALSE], 770 c(metriques, "size.class"), drop = FALSE],
834 unitobs[match(dataMetrique$observation.unit[!is.na(dataMetrique[ , metrique])], 771 unitobs[match(data_metric$observation.unit[!is.na(data_metric[, metrique])],
835 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs 772 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs
836 facteurs[is.element(facteurs, colnames(unitobs))], drop=FALSE], 773 facteurs[is.element(facteurs, colnames(unitobs))], drop = FALSE],
837 refesp[match(dataMetrique$species.code[!is.na(dataMetrique[ , metrique])], 774 refesp[match(data_metric$species.code[!is.na(data_metric[, metrique])],
838 refesp$species.code), # ajout des colonnes sélectionnées d'especes 775 refesp$species.code), # ajout des colonnes sélectionnées d'especes
839 facteurs[is.element(facteurs, colnames(refesp))], drop=FALSE]) 776 facteurs[is.element(facteurs, colnames(refesp))], drop = FALSE])
840 }, 777 },
841 ## Other cases : 778 ## Other cases :
842 restmp <- cbind(dataMetrique[!is.na(dataMetrique[ , metrique]) , metriques, drop=FALSE], 779 restmp <- cbind(data_metric[!is.na(data_metric[, metrique]), metriques, drop = FALSE],
843 unitobs[match(dataMetrique$observation.unit[!is.na(dataMetrique[ , metrique])], 780 unitobs[match(data_metric$observation.unit[!is.na(data_metric[, metrique])],
844 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs. 781 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs.
845 facteurs[is.element(facteurs, colnames(unitobs))], drop=FALSE]) 782 facteurs[is.element(facteurs, colnames(unitobs))], drop = FALSE])
846 ) 783 )
847 784
848 selCol <- which(!is.na(selections)) 785 sel_col <- which(!is.na(selections))
849 if (!is.null(exclude)) 786 if (!is.null(exclude)) {
850 { 787 sel_col <- sel_col[sel_col != exclude]
851 selCol <- selCol[selCol != exclude]
852 } 788 }
853 789
854 ## Particular case of size classes : 790 ## Particular case of size classes :
855 if (is.element("size.class", colnames(restmp))) 791 if (is.element("size.class", colnames(restmp))) {
856 { 792 if (length(grep("^[[:digit:]]*[-_][[:digit:]]*$", unique(as.character(restmp$size.class)), perl = TRUE)) ==
857 if (length(grep("^[[:digit:]]*[-_][[:digit:]]*$", unique(as.character(restmp$size.class)), perl=TRUE)) == 793 length(unique(as.character(restmp$size.class)))) {
858 length(unique(as.character(restmp$size.class)))) 794 restmp[, "size.class"] <-
859 {
860 restmp$size.class <-
861 factor(as.character(restmp$size.class), 795 factor(as.character(restmp$size.class),
862 levels=unique(as.character(restmp$size.class))[ 796 levels = unique(as.character(restmp$size.class))[
863 order(as.numeric(sub("^([[:digit:]]*)[-_][[:digit:]]*$", 797 order(as.numeric(sub("^([[:digit:]]*)[-_][[:digit:]]*$",
864 "\\1", 798 "\\1",
865 unique(as.character(restmp$size.class)), 799 unique(as.character(restmp$size.class)),
866 perl=TRUE)), 800 perl = TRUE)),
867 na.last=FALSE)]) 801 na.last = FALSE)])
868 }else{ 802 }else{
869 restmp$size.class <- factor(restmp$size.class) 803 restmp[, "size.class"] <- factor(restmp$size.class)
870 } 804 }
871 }else{} 805 }
872 806
873 ## Biomass and density conversion -> /100m² : 807 ## Biomass and density conversion -> /100m² :
874 if (any(is.element(colnames(restmp), c("biomass", "density", 808 if (any(is.element(colnames(restmp), c("biomass", "density",
875 "biomass.max", "density.max", 809 "biomass.max", "density.max",
876 "biomass.sd", "density.sd"))) && ObsType != "fishing") 810 "biomass.sd", "density.sd"))) && obs_type != "fishing") {
877 { 811 restmp[, is.element(colnames(restmp),
878 restmp[ , is.element(colnames(restmp),
879 c("biomass", "density", 812 c("biomass", "density",
880 "biomass.max", "density.max", 813 "biomass.max", "density.max",
881 "biomass.sd", "density.sd"))] <- 100 * 814 "biomass.sd", "density.sd"))] <- 100 *
882 restmp[, is.element(colnames(restmp), 815 restmp[, is.element(colnames(restmp),
883 c("biomass", "density", 816 c("biomass", "density",
884 "biomass.max", "density.max", 817 "biomass.max", "density.max",
885 "biomass.sd", "density.sd"))] 818 "biomass.sd", "density.sd"))]
886 }else{} 819 }
887 820
888 return(restmp) 821 return(restmp)
889 } 822 }
890 823
891 ######################################### end of the function subsetToutesTables.f 824 ######################################### end of the function subset_all_tables_f
892 825
893 826 ######################################### start of the function organise_fact called by modeleLineaireWP2.xxx.f in FunctExeCalcGLMxxGalaxy.r
894 ######################################### start of the function sortiesLM.f called by modeleLineaireWP2.unitobs.f in FunctExeCalcGLMGalaxy.r 827
895 sortiesLM.f <- function(objLM, TabSum, #formule, 828 organise_fact <- function(list_rand, list_fact) {
896 metrique, factAna, cut, colAna, listFact, lev = NULL, Data, 829 ## Purpose: organise response factors
897 Log=FALSE, sufixe=NULL, type="espece") 830 ## ----------------------------------------------------------------------
898 { 831 ## Arguments: list_rand : Analysis random factors list
832 ## list_fact : Analysis factors list
833 ## ----------------------------------------------------------------------
834 ## Author: Coline ROYAUX 14 november 2020
835
836 if (list_rand[1] != "None") {
837 if (all(is.element(list_fact, list_rand)) || list_fact[1] == "None") {
838 resp_fact <- paste("(1|", paste(list_rand, collapse = ") + (1|"), ")")
839 list_f <- NULL
840 list_fact <- list_rand
841 }else{
842 list_f <- list_fact[!is.element(list_fact, list_rand)]
843 resp_fact <- paste(paste(list_f, collapse = " + "), " + (1|", paste(list_rand, collapse = ") + (1|"), ")")
844 list_fact <- c(list_f, list_rand)
845 }
846 }else{
847 list_f <- list_fact
848 resp_fact <- paste(list_fact, collapse = " + ")
849 }
850 return(list(resp_fact, list_f, list_fact))
851 }
852
853 ######################################### end of the function organise_fact
854
855 ######################################### start of the function organise_fact called by modeleLineaireWP2.xxx.f in FunctExeCalcGLMxxGalaxy.r
856 distrib_choice <- function(distrib = distrib, metrique = metrique, data = tmpd_ata) {
857 ## Purpose: choose the right distribution
858 ## ----------------------------------------------------------------------
859 ## Arguments: data : data used for analysis
860 ## metrique : Chosen metric
861 ## distrib : distribution law selected by user
862 ## ----------------------------------------------------------------------
863 ## Author: Coline ROYAUX 14 november 2020
864
865 if (distrib == "None") {
866 if (metrique == "presence_absence") {
867 chose_distrib <- "binomial"
868 }else{
869 switch(class(data[, metrique]),
870 "integer" = {
871 chose_distrib <- "poisson"
872 },
873 "numeric" = {
874 chose_distrib <- "gaussian"
875 },
876 stop("Selected metric class doesn't fit, you should select an integer or a numeric variable"))
877 }
878 }else{
879 chose_distrib <- distrib
880 }
881 return(chose_distrib)
882 }
883
884 ######################################### end of the function organise_fact
885
886 ######################################### start of the function create_res_table called by modeleLineaireWP2.xxx.f in FunctExeCalcGLMxxGalaxy.r
887 create_res_table <- function(list_rand, list_fact, row, lev, distrib) {
888 ## Purpose: create results table
889 ## ----------------------------------------------------------------------
890 ## Arguments: list_rand : Analysis random factors list
891 ## list_fact : Analysis factors list
892 ## row : rows of results table = species or separation factor
893 ## lev : Levels of analysis factors list
894 ## distrib : distribution law
895 ## ----------------------------------------------------------------------
896 ## Author: Coline ROYAUX 04 october 2020
897
898 if (list_rand[1] != "None") { ## if random effects
899 tab_sum <- data.frame(analysis = row, Interest.var = NA, distribution = NA, AIC = NA, BIC = NA, logLik = NA, deviance = NA, df.resid = NA)
900 colrand <- unlist(lapply(list_rand,
901 FUN = function(x) {
902 lapply(c("Std.Dev", "NbObservation", "NbLevels"),
903 FUN = function(y) {
904 paste(x, y, collapse = ":")
905 })
906 }))
907 tab_sum[, colrand] <- NA
908
909 if (! is.null(lev)) { ## if fixed effects + random effects
910 colcoef <- unlist(lapply(c("(Intercept)", lev),
911 FUN = function(x) {
912 lapply(c("Estimate", "Std.Err", "Zvalue", "Pvalue", "IC_up", "IC_inf", "signif"),
913 FUN = function(y) {
914 paste(x, y, collapse = ":")
915 })
916 }))
917
918 }else{ ## if no fixed effects
919 colcoef <- NULL
920 }
921
922 }else{ ## if no random effects
923 tab_sum <- data.frame(analysis = row, Interest.var = NA, distribution = NA, AIC = NA, Resid.deviance = NA, df.resid = NA, Null.deviance = NA, df.null = NA)
924
925 switch(distrib,
926 "gaussian" = {
927 colcoef <- unlist(lapply(c("(Intercept)", lev),
928 FUN = function(x) {
929 lapply(c("Estimate", "Std.Err", "Tvalue", "Pvalue", "IC_up", "IC_inf", "signif"),
930 FUN = function(y) {
931 paste(x, y, collapse = ":")
932 })
933 }))
934
935 },
936 "quasipoisson" = {
937 colcoef <- unlist(lapply(c("(Intercept)", lev),
938 FUN = function(x) {
939 lapply(c("Estimate", "Std.Err", "Tvalue", "Pvalue", "IC_up", "IC_inf", "signif"),
940 FUN = function(y) {
941 paste(x, y, collapse = ":")
942 })
943 }))
944
945 }
946 , {
947 colcoef <- unlist(lapply(c("(Intercept)", lev),
948 FUN = function(x) {
949 lapply(c("Estimate", "Std.Err", "Zvalue", "Pvalue", "IC_up", "IC_inf", "signif"),
950 FUN = function(y) {
951 paste(x, y, collapse = ":")
952 })
953 }))
954 })
955
956 }
957
958 tab_sum[, colcoef] <- NA
959
960
961 return(tab_sum)
962 }
963 ######################################### end of the function create_res_table
964
965 ######################################### start of the function sorties_lm_f called by glm_community in FunctExeCalcGLMGalaxy.r
966 sorties_lm_f <- function(obj_lm, obj_lmy, tab_sum, #formule,
967 metrique, fact_ana, cut, col_ana, list_fact, list_rand, lev = NULL, d_ata,
968 log = FALSE, sufixe = NULL) {
899 ## Purpose: Form GLM and LM results 969 ## Purpose: Form GLM and LM results
900 ## ---------------------------------------------------------------------- 970 ## ----------------------------------------------------------------------
901 ## Arguments: objLM : lm object 971 ## Arguments: obj_lm : lm object
902 ## TabSum : output summary table 972 ## obj_lmy : lm object with year as continuous
973 ## tab_sum : output summary table
903 ## formule : LM formula 974 ## formule : LM formula
904 ## metrique : Chosen metric 975 ## metrique : Chosen metric
905 ## factAna : separation factor 976 ## fact_ana : separation factor
906 ## cut : level of separation factor 977 ## cut : level of separation factor
907 ## colAna : colname for separation factor in output summary table 978 ## col_ana : colname for separation factor in output summary table
908 ## listFact : Analysis factors list 979 ## list_fact : Analysis factors list
980 ## list_rand : Analysis random factors list
909 ## levels : Levels of analysis factors list 981 ## levels : Levels of analysis factors list
910 ## Data : Data used for analysis 982 ## d_ata : d_ata used for analysis
911 ## Log : put log on metric ? (boolean) 983 ## log : put log on metric ? (boolean)
912 ## sufixe : sufix for file name 984 ## sufixe : sufix for file name
913 ## type : analysis type
914 ## ---------------------------------------------------------------------- 985 ## ----------------------------------------------------------------------
915 ## Author: Yves Reecht, Date: 25 août 2010, 16:19 modified by Coline ROYAUX 04 june 2020 986 ## Author: Yves Reecht, Date: 25 août 2010, 16:19 modified by Coline ROYAUX 04 june 2020
916 987
917 sumLM <- summary(objLM) 988 tab_sum[, "Interest.var"] <- as.character(metrique)
918 if (length(grep("^glmmTMB", objLM$call)) > 0) #if random effects 989 sum_lm <- summary(obj_lm)
919 { 990 tab_sum[, "distribution"] <- as.character(sum_lm$family[1])
920 TabSum[TabSum[,colAna]==cut,"AIC"] <- sumLM$AICtab[1] 991
921 TabSum[TabSum[,colAna]==cut,"BIC"] <- sumLM$AICtab[2] 992 if (length(grep("^glmmTMB", obj_lm$call)) > 0) { #if random effects
922 TabSum[TabSum[,colAna]==cut,"logLik"] <- sumLM$AICtab[3] 993 tab_sum[tab_sum[, col_ana] == cut, "AIC"] <- sum_lm$AICtab[1]
923 TabSum[TabSum[,colAna]==cut,"deviance"] <- sumLM$AICtab[4] 994 tab_sum[tab_sum[, col_ana] == cut, "BIC"] <- sum_lm$AICtab[2]
924 TabSum[TabSum[,colAna]==cut,"df.resid"] <- sumLM$AICtab[5] 995 tab_sum[tab_sum[, col_ana] == cut, "logLik"] <- sum_lm$AICtab[3]
925 996 tab_sum[tab_sum[, col_ana] == cut, "deviance"] <- sum_lm$AICtab[4]
926 if (! is.null(lev)) ## if fixed effects + random effects 997 tab_sum[tab_sum[, col_ana] == cut, "df.resid"] <- sum_lm$AICtab[5]
927 { 998
928 TabCoef <- as.data.frame(sumLM$coefficients$cond) 999 if (! is.null(lev)) { ## if fixed effects + random effects
929 TabCoef$signif <- lapply(TabCoef[,"Pr(>|z|)"],FUN=function(x){if(!is.na(x) && x < 0.05){"yes"}else{"no"}}) 1000 tab_coef <- as.data.frame(sum_lm$coefficients$cond)
930 1001 tab_coef$signif <- lapply(tab_coef[, "Pr(>|z|)"], FUN = function(x) {
931 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Zvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"z value"] 1002 if (!is.na(x) && x < 0.05) {
932 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Pvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Pr(>|z|)"] 1003 "yes"
933 1004 }else{
934 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Zvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"z value"]}else{NA}})) 1005 "no"
935 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Pvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Pr(>|z|)"]}else{NA}})) 1006 }
936 }else{} 1007 })
937 1008
938 switch(as.character(length(sumLM$varcor$cond)), 1009 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Zvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "z value"]
939 "1"={StdD <- c(sumLM$varcor$cond[[1]])}, 1010 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Pvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Pr(>|z|)"]
940 "2"={StdD <- c(sumLM$varcor$cond[[1]],sumLM$varcor$cond[[2]])}, 1011
941 StdD <- NULL) 1012 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Zvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
942 TabSum[TabSum[,colAna]==cut,grepl(paste(listRand,"Std.Dev",collapse="|"),colnames(TabSum))] <- StdD 1013 if (length(grep(x, rownames(tab_coef))) > 0) {
943 TabSum[TabSum[,colAna]==cut,grepl(paste(listRand,"NbObservation",collapse="|"),colnames(TabSum))] <- sumLM$nobs 1014 tab_coef[grepl(x, rownames(tab_coef)), "z value"]
944 TabSum[TabSum[,colAna]==cut,grepl(paste(listRand,"NbLevels",collapse="|"),colnames(TabSum))] <- unlist(lapply(listRand,FUN=function(x){nlevels(Data[,x])})) 1015 }else{
945 1016 NA
1017 }
1018 }))
1019 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Pvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1020 if (length(grep(x, rownames(tab_coef))) > 0) {
1021 tab_coef[grepl(x, rownames(tab_coef)), "Pr(>|z|)"]
1022 }else{
1023 NA
1024 }
1025 }))
1026
1027 if (any(obj_lmy != "")) {
1028 sum_lmy <- summary(obj_lmy)
1029 tab_coefy <- as.data.frame(sum_lmy$coefficients$cond)
1030 tab_coefy$signif <- lapply(tab_coefy[, "Pr(>|z|)"], FUN = function(x) {
1031 if (!is.na(x) && x < 0.05) {
1032 "yes"
1033 }else{
1034 "no"
1035 }
1036 })
1037 tab_sum[tab_sum[, col_ana] == cut, "year Zvalue"] <- ifelse(length(tab_coefy["year", "z value"]) > 0, tab_coefy["year", "z value"], NA)
1038 tab_sum[tab_sum[, col_ana] == cut, "year Pvalue"] <- ifelse(length(tab_coefy["year", "Pr(>|z|)"]) > 0, tab_coefy["year", "Pr(>|z|)"], NA)
1039 }
1040
1041 }
1042
1043 switch(as.character(length(sum_lm$varcor$cond)),
1044 "1" = {
1045 std_d <- c(sum_lm$varcor$cond[[1]])
1046 },
1047 "2" = {
1048 std_d <- c(sum_lm$varcor$cond[[1]], sum_lm$varcor$cond[[2]])
1049 },
1050 std_d <- NULL)
1051
1052 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(list_rand, "Std.Dev", collapse = "|"), colnames(tab_sum))] <- std_d
1053 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(list_rand, "NbObservation", collapse = "|"), colnames(tab_sum))] <- sum_lm$nobs
1054 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(list_rand, "NbLevels", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(list_rand, FUN = function(x) {
1055 nlevels(d_ata[, x])
1056 }))
1057
946 }else{ ## if fixed effects only 1058 }else{ ## if fixed effects only
947 1059
948 TabSum[TabSum[,colAna]==cut,"AIC"] <- sumLM$aic 1060 tab_sum[tab_sum[, col_ana] == cut, "AIC"] <- sum_lm$aic
949 TabSum[TabSum[,colAna]==cut,"Resid.deviance"] <- sumLM$deviance 1061 tab_sum[tab_sum[, col_ana] == cut, "Resid.deviance"] <- sum_lm$deviance
950 TabSum[TabSum[,colAna]==cut,"df.resid"] <- sumLM$df.residual 1062 tab_sum[tab_sum[, col_ana] == cut, "df.resid"] <- sum_lm$df.residual
951 TabSum[TabSum[,colAna]==cut,"Null.deviance"] <- sumLM$null.deviance 1063 tab_sum[tab_sum[, col_ana] == cut, "Null.deviance"] <- sum_lm$null.deviance
952 TabSum[TabSum[,colAna]==cut,"df.null"] <- sumLM$df.null 1064 tab_sum[tab_sum[, col_ana] == cut, "df.null"] <- sum_lm$df.null
953 TabCoef <- as.data.frame(sumLM$coefficients) 1065 tab_coef <- as.data.frame(sum_lm$coefficients)
954 1066
955 if (sumLM$family[1] == "gaussian" || sumLM$family[1] == "quasipoisson") 1067 if (any(obj_lmy != "")) {
956 { 1068 sum_lmy <- summary(obj_lmy)
957 1069 tab_coefy <- as.data.frame(sum_lmy$coefficients)
958 TabCoef$signif <- lapply(TabCoef[,"Pr(>|t|)"],FUN=function(x){if(!is.na(x) && x < 0.05){"yes"}else{"no"}}) 1070 }
959 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Tvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"t value"] 1071
960 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Pvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Pr(>|t|)"] 1072 if (sum_lm$family[1] == "gaussian" || sum_lm$family[1] == "quasipoisson") {
961 1073
962 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Tvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"t value"]}else{NA}})) 1074 tab_coef$signif <- lapply(tab_coef[, "Pr(>|t|)"], FUN = function(x) {
963 1075 if (!is.na(x) && x < 0.05) {
964 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Pvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Pr(>|t|)"]}else{NA}})) 1076 "yes"
965 }else{ 1077 }else{
966 TabCoef$signif <- lapply(TabCoef[,"Pr(>|z|)"],FUN=function(x){if(!is.na(x) && x < 0.05){"yes"}else{"no"}}) 1078 "no"
967 1079 }
968 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Zvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"z value"] 1080 })
969 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Pvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Pr(>|z|)"] 1081 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Tvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "t value"]
970 1082 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Pvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Pr(>|t|)"]
971 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Zvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"z value"]}else{NA}})) 1083
972 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Pvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Pr(>|z|)"]}else{NA}})) 1084 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Tvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
973 } 1085 if (length(grep(x, rownames(tab_coef))) > 0) {
974 } 1086 tab_coef[grepl(x, rownames(tab_coef)), "t value"]
975 1087 }else{
976 if (! is.null(lev)) ## if fixed effects 1088 NA
977 { 1089 }
978 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Estimate",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Estimate"] 1090 }))
979 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Std.Err",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Std. Error"] 1091
980 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*signif",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"signif"] 1092 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Pvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
981 1093 if (length(grep(x, rownames(tab_coef))) > 0) {
982 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Estimate",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Estimate"]}else{NA}})) 1094 tab_coef[grepl(x, rownames(tab_coef)), "Pr(>|t|)"]
983 1095 }else{
984 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Std.Err",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Std. Error"]}else{NA}})) 1096 NA
985 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"signif",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"signif"]}else{NA}})) 1097 }
986 }else{} 1098 }))
987 1099
988 return(TabSum) 1100 if (any(obj_lmy != "")) {
989 1101 tab_coefy$signif <- lapply(tab_coefy[, "Pr(>|t|)"], FUN = function(x) {
990 } 1102 if (!is.na(x) && x < 0.05) {
991 1103 "yes"
992 1104 }else{
993 ######################################### end of the function sortiesLM.f 1105 "no"
994 1106 }
995 ######################################### start of the function graphTitle.f called by sortiesLM.f 1107 })
996 1108 tab_sum[tab_sum[, col_ana] == cut, "year Tvalue"] <- ifelse(length(tab_coefy["year", "t value"]) > 0, tab_coefy["year", "t value"], NA)
997 graphTitle.f <- function(metrique, modGraphSel, factGraph, listFact, model=NULL, type="espece", 1109 tab_sum[tab_sum[, col_ana] == cut, "year Pvalue"] <- ifelse(length(tab_coefy["year", "Pr(>|z|)"]) > 0, tab_coefy["year", "Pr(>|t|)"], NA)
998 lang = getOption("P.lang")) 1110 }
999 { 1111
1000 ## Purpose: Automatically write a name for a graph 1112 }else{
1001 ## ---------------------------------------------------------------------- 1113 tab_coef$signif <- lapply(tab_coef[, "Pr(>|z|)"], FUN = function(x) {
1002 ## Arguments: 1114 if (!is.na(x) && x < 0.05) {
1003 ## ---------------------------------------------------------------------- 1115 "yes"
1004 ## Author: Yves Reecht, Date: 14 oct. 2010, 15:44 modified by Coline ROYAUX 04 june 2020 1116 }else{
1005 return(paste(ifelse(is.null(model), 1117 "no"
1006 "Values of ", 1118 }
1007 paste(model, 1119 })
1008 " for", 1120
1009 sep="")), 1121 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Zvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "z value"]
1010 metrique, 1122 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Pvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Pr(>|z|)"]
1011 ifelse(is.element(type, c("espece", "unitobs", "CL_espece", "unitobs(CL)")), 1123
1012 paste("aggregated"), 1124 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Zvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1013 ""), 1125 if (length(grep(x, rownames(tab_coef))) > 0) {
1014 switch(type, 1126 tab_coef[grepl(x, rownames(tab_coef)), "z value"]
1015 "espece"=" per species and station", 1127 }else{
1016 "CL_espece"=" per size class, species and station", 1128 NA
1017 "unitobs"=" per station", 1129 }
1018 "unitobs(CL)"=" per station", 1130 }))
1019 "CL_unitobs"=" per size class and station", 1131 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Pvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1020 "biodiv"=" per station", 1132 if (length(grep(x, rownames(tab_coef))) > 0) {
1021 ""), 1133 tab_coef[grepl(x, rownames(tab_coef)), "Pr(>|z|)"]
1022 switch(type, 1134 }else{
1023 "espece"={ 1135 NA
1024 ifelse(modGraphSel == "", # Only separation factor if defined 1136 }
1025 "", 1137 }))
1026 paste("\nfor the field", 1138
1027 " '", factGraph, "' = ", modGraphSel, sep="")) 1139 if (any(obj_lmy != "")) {
1028 }, 1140 tab_coefy$signif <- lapply(tab_coefy[, "Pr(>|z|)"], FUN = function(x) {
1029 "CL_espece"={ 1141 if (!is.na(x) && x < 0.05) {
1030 ifelse(modGraphSel == "", # Only separation factor if defined 1142 "yes"
1031 "", 1143 }else{
1032 paste("\nfor the field", 1144 "no"
1033 " '", factGraph, "' = ", modGraphSel, sep="")) 1145 }
1034 }, 1146 })
1035 "unitobs"={ 1147
1036 ifelse(modGraphSel[1] == "", # Only separation factor if defined 1148 tab_sum[tab_sum[, col_ana] == cut, "year Zvalue"] <- ifelse(length(tab_coefy["year", "z value"]) > 0, tab_coefy["year", "z value"], NA)
1037 "\nfor all species", 1149 tab_sum[tab_sum[, col_ana] == cut, "year Pvalue"] <- ifelse(length(tab_coefy["year", "Pr(>|z|)"]) > 0, tab_coefy["year", "Pr(>|z|)"], NA)
1038 paste("\nfor all species matching", 1150 }
1039 " '", factGraph, "' = (", 1151 }
1040 paste(modGraphSel, collapse=", "), ")", sep="")) 1152 }
1041 }, 1153
1042 "unitobs(CL)"={ 1154 if (! is.null(lev)) { ## if fixed effects
1043 ifelse(modGraphSel[1] == "", # Only separation factor if defined 1155 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Estimate", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Estimate"]
1044 "\nfor all size classes", 1156 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Std.Err", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Std. Error"]
1045 paste("\nfor size classes matching", 1157 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*signif", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "signif"]
1046 " '", factGraph, "' = (", 1158
1047 paste(modGraphSel, collapse=", "), ")", sep="")) 1159 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Estimate", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1048 }, 1160 if (length(grep(x, rownames(tab_coef))) > 0) {
1049 "CL_unitobs"={ 1161 tab_coef[grepl(x, rownames(tab_coef)), "Estimate"]
1050 ifelse(modGraphSel[1] == "", # Only separation factor if defined 1162 }else{
1051 "\nfor all species", 1163 NA
1052 paste("\nfor all species matching", 1164 }
1053 " '", factGraph, "' = (", 1165 }))
1054 paste(modGraphSel, collapse=", "), ")", sep="")) 1166 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Std.Err", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1055 }, 1167 if (length(grep(x, rownames(tab_coef))) > 0) {
1056 "biodiv"={ 1168 tab_coef[grepl(x, rownames(tab_coef)), "Std. Error"]
1057 ifelse(modGraphSel[1] == "", # Only separation factor if defined 1169 }else{
1058 "", 1170 NA
1059 paste("\nfor stations matching", 1171 }
1060 " '", factGraph, "' = (", 1172 }))
1061 paste(modGraphSel, collapse=", "), ")", sep="")) 1173 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "signif", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1062 }, 1174 if (length(grep(x, rownames(tab_coef))) > 0) {
1063 ""), 1175 tab_coef[grepl(x, rownames(tab_coef)), "signif"]
1064 "\n by ", 1176 }else{
1065 paste(sapply(listFact[length(listFact):1], 1177 NA
1066 function(x)paste(c(## varNames.f(x, "article"), 1178 }
1067 "", 1179 }))
1068 x, collapse="")), 1180
1069 collapse=" and"), 1181 if (any(obj_lmy != "")) {
1070 "\n", sep=""))) 1182 tab_sum[tab_sum[, col_ana] == cut, "year Estimate"] <- ifelse(length(tab_coefy["year", "Estimate"]) > 0, tab_coefy["year", "Estimate"], NA)
1071 } 1183 tab_sum[tab_sum[, col_ana] == cut, "year Std.Err"] <- ifelse(length(tab_coefy["year", "Std. Error"]) > 0, tab_coefy["year", "Std. Error"], NA)
1072 1184 tab_sum[tab_sum[, col_ana] == cut, "year signif"] <- ifelse(length(tab_coefy["year", "signif"]) > 0, tab_coefy["year", "signif"], NA)
1073 ######################################### end of the function graphTitle.f 1185 }
1074 1186
1075 ######################################### start of the function noteGLM.f called by modeleLineaireWP2.species.f and modeleLineaireWP2.unitobs.f 1187 }
1076 1188
1077 noteGLM.f <- function(data, objLM, metric, listFact, details = FALSE) 1189 ic <- tryCatch(as.data.frame(confint(obj_lm)), error = function(e) {
1078 { 1190 })
1191
1192 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "IC_up", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1193 if (length(grep(x, rownames(ic))) > 0) {
1194 ic[grepl(x, rownames(ic)), "97.5 %"]
1195 }else{
1196 NA
1197 }
1198 }))
1199 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "IC_inf", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) {
1200 if (length(grep(x, rownames(ic))) > 0) {
1201 ic[grepl(x, rownames(ic)), "2.5 %"]
1202 }else{
1203 NA
1204 }
1205 }))
1206
1207 return(tab_sum)
1208
1209 }
1210
1211
1212 ######################################### end of the function sorties_lm_f
1213
1214
1215 ######################################### start of the function note_glm_f called by glm_species and glm_community
1216
1217 note_glm_f <- function(data, obj_lm, metric, list_fact, details = FALSE) {
1079 ## Purpose: Note your GLM analysis 1218 ## Purpose: Note your GLM analysis
1080 ## ---------------------------------------------------------------------- 1219 ## ----------------------------------------------------------------------
1081 ## Arguments: data : Dataframe used for analysis 1220 ## Arguments: data : d_ataframe used for analysis
1082 ## objLM : GLM assessed 1221 ## obj_lm : GLM assessed
1083 ## metric : selected metric 1222 ## metric : selected metric
1084 ## listFact : Analysis factors list 1223 ## list_fact : Analysis factors list
1224 ## details : detailed output ?
1085 ## ---------------------------------------------------------------------- 1225 ## ----------------------------------------------------------------------
1086 ## Author: Coline ROYAUX, 26 june 2020 1226 ## Author: Coline ROYAUX, 26 june 2020
1087 1227
1088 rate <- 0 1228 rate <- 0
1089 detres <- list(complete_plan=NA, balanced_plan=NA, NA_proportion_OK=NA, no_residual_dispersion=NA, uniform_residuals=NA, outliers_proportion_OK=NA, no_zero_inflation=NA, observation_factor_ratio_OK=NA, enough_levels_random_effect=NA, rate=NA) 1229 detres <- list(complete_plan = NA, balanced_plan = NA, NA_proportion_OK = NA, no_residual_dispersion = NA, uniform_residuals = NA, outliers_proportion_OK = NA, no_zero_inflation = NA, observation_factor_ratio_OK = NA, enough_levels_random_effect = NA, rate = NA)
1090 1230
1091 #### Data criterions #### 1231 #### d_ata criterions ####
1092 1232
1093 ## Plan 1233 ## Plan
1094 1234
1095 plan <- as.data.frame(table(data[,listFact])) 1235 plan <- as.data.frame(table(data[, list_fact]))
1096 1236
1097 if (nrow(plan[plan$Freq==0,]) < nrow(plan)*0.1) # +0.5 if less than 10% of possible factor's level combinations aren't represented in the sampling scheme 1237 if (nrow(plan[plan$Freq == 0, ]) < nrow(plan) * 0.1) { # +0.5 if less than 10% of possible factor's level combinations aren't represented in the sampling scheme
1098 { 1238 rate <- rate + 0.5
1099 rate <- rate + 0.5
1100 detres$complete_plan <- TRUE 1239 detres$complete_plan <- TRUE
1101 1240
1102 if (summary(as.factor(plan$Freq))[1] > nrow(plan)*0.9) # +0.5 if the frequency of the most represented frequency of possible factor's levels combinations is superior to 90% of the total number of possible factor's levels combinations 1241 if (summary(as.factor(plan$Freq))[1] > nrow(plan) * 0.9) { # +0.5 if the frequency of the most represented frequency of possible factor's levels combinations is superior to 90% of the total number of possible factor's levels combinations
1103 {
1104 rate <- rate + 0.5 1242 rate <- rate + 0.5
1105 detres$balanced_plan <- TRUE 1243 detres$balanced_plan <- TRUE
1106 }else{} 1244 }
1107 1245
1108 }else{ 1246 }else{
1109 detres$complete_plan <- FALSE 1247 detres$complete_plan <- FALSE
1110 detres$balanced_plan <- FALSE 1248 detres$balanced_plan <- FALSE
1111 } 1249 }
1112 1250
1113 if (nrow(data) - nrow(na.omit(data)) < nrow(data)*0.1) # +1 if less than 10% of the lines in the dataframe bares a NA 1251 if (nrow(data) - nrow(na.omit(data)) < nrow(data) * 0.1) { # +1 if less than 10% of the lines in the dataframe bares a NA
1114 {
1115 rate <- rate + 1 1252 rate <- rate + 1
1116 detres$NA_proportion_OK <- TRUE 1253 detres["NA_proportion_OK"] <- TRUE
1117 }else{ 1254 }else{
1118 detres$NA_proportion_OK <- FALSE 1255 detres["NA_proportion_OK"] <- FALSE
1119 } 1256 }
1120 1257
1121 #### Model criterions #### 1258 #### Model criterions ####
1122 1259
1123 if (length(grep("quasi",objLM$family)) == 0) #DHARMa doesn't work with quasi distributions 1260 if (length(grep("quasi", obj_lm$family)) == 0) { #DHARMa doesn't work with quasi distributions
1124 { 1261
1125 1262 residuals <- DHARMa::simulateResiduals(obj_lm)
1126 Residuals <- simulateResiduals(objLM) 1263
1127 1264 capture.output(test_res <- DHARMa::testResiduals(residuals))
1128 capture.output(testRes <- testResiduals(Residuals)) 1265 test_zero <- DHARMa::testZeroInflation(residuals)
1129 testZero <- testZeroInflation(Residuals)
1130 1266
1131 ## dispersion of residuals 1267 ## dispersion of residuals
1132 1268
1133 if (testRes$dispersion$p.value > 0.05) # +1.5 if dispersion tests not significative 1269 if (test_res$dispersion$p.value > 0.05) { # +1.5 if dispersion tests not significative
1134 {
1135 rate <- rate + 1.5 1270 rate <- rate + 1.5
1136 detres$no_residual_dispersion <- TRUE 1271 detres$no_residual_dispersion <- TRUE
1137 }else{ 1272 }else{
1138 detres$no_residual_dispersion <- FALSE 1273 detres$no_residual_dispersion <- FALSE
1139 } 1274 }
1140 1275
1141 ## uniformity of residuals 1276 ## uniformity of residuals
1142 1277
1143 if (testRes$uniformity$p.value > 0.05) # +1 if uniformity tests not significative 1278 if (test_res$uniformity$p.value > 0.05) { # +1 if uniformity tests not significative
1144 { 1279 rate <- rate + 1
1145 rate <- rate + 1.5
1146 detres$uniform_residuals <- TRUE 1280 detres$uniform_residuals <- TRUE
1147 }else{ 1281 }else{
1148 detres$uniform_residuals <- FALSE 1282 detres$uniform_residuals <- FALSE
1149 } 1283 }
1150 1284
1151 ## residuals outliers 1285 ## residuals outliers
1152 1286
1153 if (testRes$outliers$p.value > 0.05) # +0.5 if outliers tests not significative 1287 if (test_res$outliers$p.value > 0.05) { # +0.5 if outliers tests not significative
1154 {
1155 rate <- rate + 0.5 1288 rate <- rate + 0.5
1156 detres$outliers_proportion_OK <- TRUE 1289 detres["outliers_proportion_OK"] <- TRUE
1157 }else{ 1290 }else{
1158 detres$outliers_proportion_OK <- FALSE 1291 detres["outliers_proportion_OK"] <- FALSE
1159 } 1292 }
1160 1293
1161 ## Zero inflation test 1294 ## Zero inflation test
1162 1295
1163 if (testZero$p.value > 0.05) # +1 if zero inflation tests not significative 1296 if (test_zero$p.value > 0.05) { # +1 if zero inflation tests not significative
1164 { 1297 rate <- rate + 1
1165 rate <- rate + 1.5
1166 detres$no_zero_inflation <- TRUE 1298 detres$no_zero_inflation <- TRUE
1167 }else{ 1299 }else{
1168 detres$no_zero_inflation <- FALSE 1300 detres$no_zero_inflation <- FALSE
1169 } 1301 }
1170 1302
1171 ## Factors/observations ratio 1303 ## Factors/observations ratio
1172 1304
1173 if (length(listFact)/nrow(na.omit(data)) < 0.1) # +1 if quantity of factors is less than 10% of the quantity of observations 1305 if (length(list_fact) / nrow(na.omit(data)) < 0.1) { # +1 if quantity of factors is less than 10% of the quantity of observations
1174 {
1175 rate <- rate + 1 1306 rate <- rate + 1
1176 detres$observation_factor_ratio_OK <- TRUE 1307 detres["observation_factor_ratio_OK"] <- TRUE
1177 }else{ 1308 }else{
1178 detres$observation_factor_ratio_OK <- FALSE 1309 detres["observation_factor_ratio_OK"] <- FALSE
1179 } 1310 }
1180 1311
1181 ## less than 10 factors' level on random effect 1312 ## less than 10 factors' level on random effect
1182 1313
1183 if (length(grep("^glmmTMB", objLM$call)) > 0) 1314 if (length(grep("^glmmTMB", obj_lm$call)) > 0) {
1184 { 1315 nlev_rand <- c()
1185 nlevRand <- c() 1316 for (fact in names(summary(obj_lm)$varcor$cond)) {
1186 for(fact in names(summary(objLM)$varcor$cond)) 1317 nlev_rand <- c(nlev_rand, length(unlist(unique(data[, fact]))))
1187 {
1188 nlevRand <- c(nlevRand,length(unlist(unique(data[,fact]))))
1189 } 1318 }
1190 1319
1191 if (all(nlevRand > 10)) # +1 if more than 10 levels in one random effect 1320 if (all(nlev_rand > 10)) { # +1 if more than 10 levels in one random effect
1192 {
1193 rate <- rate + 1 1321 rate <- rate + 1
1194 detres$enough_levels_random_effect <- TRUE 1322 detres$enough_levels_random_effect <- TRUE
1195 }else{ 1323 }else{
1196 detres$enough_levels_random_effect <- FALSE 1324 detres$enough_levels_random_effect <- FALSE
1197 } 1325 }
1198 }else{} 1326 }
1199 1327
1200 detres$rate <- rate 1328 detres$rate <- rate
1201 1329
1202 if (details) 1330 if (details) {
1203 { 1331 return(detres)
1204 return(detres)
1205 }else{ 1332 }else{
1206 return(rate) 1333 return(rate)
1207 } 1334 }
1208 1335
1209 }else{ 1336 }else{
1210 return(NA) 1337 return(NA)
1211 cat("Models with quasi distributions can't be rated for now") 1338 cat("Models with quasi distributions can't be rated for now")
1212 } 1339 }
1213 } 1340 }
1214 1341
1215 ######################################### end of the function noteGLM.f 1342 ######################################### end of the function note_glm_f
1216 1343
1217 ######################################### start of the function noteGLMs.f called by modeleLineaireWP2.species.f and modeleLineaireWP2.unitobs.f 1344 ######################################### start of the function note_glms_f called by glm_species and glm_community
1218 1345
1219 noteGLMs.f <- function(tabRate, exprML, objLM, file_out=FALSE) 1346 note_glms_f <- function(tab_rate, expr_lm, obj_lm, file_out = FALSE) {
1220 {
1221 ## Purpose: Note your GLM analysis 1347 ## Purpose: Note your GLM analysis
1222 ## ---------------------------------------------------------------------- 1348 ## ----------------------------------------------------------------------
1223 ## Arguments: data : rates table from noteGLM.f 1349 ## Arguments: tab_rate : rates table from note_glm_f
1224 ## objLM : GLM assessed 1350 ## expr_lm : GLM expression assessed
1225 ## metric : selected metric 1351 ## obj_lm : GLM object
1226 ## listFact : Analysis factors list 1352 ## file_out : Output as file ? else global rate only
1227 ## ---------------------------------------------------------------------- 1353 ## ----------------------------------------------------------------------
1228 ## Author: Coline ROYAUX, 26 june 2020 1354 ## Author: Coline ROYAUX, 26 june 2020
1229 1355 namefile <- "RatingGLM.txt"
1230 RateM <- mean(na.omit(tabRate[,"rate"])) 1356
1231 sum <- summary(objLM) 1357 if (length(grep("quasi", obj_lm$family)) == 0) { #DHARMa doesn't work with quasi distributions
1232 1358
1233 if (length(grep("^glmmTMB", objLM$call)) > 0) 1359 rate_m <- median(na.omit(tab_rate[, "rate"]))
1234 { 1360 sum <- summary(obj_lm)
1235 if (median(na.omit(tabRate[,"rate"])) >= 6) # if 50% has a rate superior or equal to 6 +1 1361
1236 { 1362 if (length(grep("^glmmTMB", obj_lm$call)) > 0) {
1237 RateM <- RateM + 1 1363 if (median(na.omit(tab_rate[, "rate"])) >= 6) { # if 50% has a rate superior or equal to 6 +1
1238 } 1364 rate_m <- rate_m + 1
1239 1365 }
1240 if (quantile(na.omit(tabRate[,"rate"]), probs=0.9) >= 6) # if 90% has a rate superior or equal to 6 +1 1366
1241 { 1367 if (quantile(na.omit(tab_rate[, "rate"]), probs = 0.9) >= 6) { # if 90% has a rate superior or equal to 6 +1
1242 RateM <- RateM + 1 1368 rate_m <- rate_m + 1
1243 } 1369 }
1244 }else{ 1370 }else{
1245 if (median(na.omit(tabRate[,"rate"])) >= 5) # if 50% has a rate superior or equal to 5 +1 1371 if (median(na.omit(tab_rate[, "rate"])) >= 5) { # if 50% has a rate superior or equal to 5 +1
1246 { 1372 rate_m <- rate_m + 1
1247 RateM <- RateM + 1 1373 }
1248 } 1374
1249 1375 if (quantile(na.omit(tab_rate[, "rate"]), probs = 0.9) >= 5) { # if 90% has a rate superior or equal to 5 +1
1250 if (quantile(na.omit(tabRate[,"rate"]), probs=0.9) >= 5) # if 90% has a rate superior or equal to 5 +1 1376 rate_m <- rate_m + 1
1251 { 1377 }
1252 RateM <- RateM + 1 1378 }
1253 } 1379
1254 } 1380 if (file_out) {
1255
1256 if (file_out)
1257 {
1258 namefile <- "RatingGLM.txt"
1259 1381
1260 cat("###########################################################################", 1382 cat("###########################################################################",
1261 "\n########################### Analysis evaluation ###########################", 1383 "\n########################### Analysis evaluation ###########################",
1262 "\n###########################################################################", file=namefile, fill=1,append=TRUE) 1384 "\n###########################################################################", file = namefile, fill = 1, append = TRUE)
1263 1385
1264 ## Informations on model : 1386 ## Informations on model :
1265 cat("\n\n######################################### \nFitted model:", file=namefile, fill=1,append=TRUE) 1387 cat("\n\n######################################### \nFitted model:", file = namefile, fill = 1, append = TRUE)
1266 cat("\t", deparse(exprML), "\n\n", file=namefile, sep="",append=TRUE) 1388 cat("\t", deparse(expr_lm), "\n\n", file = namefile, sep = "", append = TRUE)
1267 cat("Family: ", sum$family[[1]], 1389 cat("Family: ", sum$family[[1]],
1268 file=namefile,append=TRUE) 1390 file = namefile, append = TRUE)
1269 cat("\n\nNumber of analysis: ", nrow(tabRate), file=namefile, append=TRUE) 1391 cat("\n\nNumber of analysis: ", nrow(tab_rate), file = namefile, append = TRUE)
1270 1392
1271 ## Global rate : 1393 ## Global rate :
1272 cat("\n\n######################################### \nGlobal rate for all analysis:", 1394 cat("\n\n######################################### \nGlobal rate for all analysis:",
1273 "\n\n", RateM, "out of 10", file=namefile, append=TRUE) 1395 "\n\n", rate_m, "out of 10", file = namefile, append = TRUE)
1274 1396
1275 ## details on every GLM : 1397 ## details on every GLM :
1276 #NA_proportion_OK=NA, no_residual_dispersion=NA, uniform_residuals=NA, outliers_proportion_OK=NA, no_zero_inflation=NA, observation_factor_ratio_OK=NA, enough_levels_random_effect=NA, rate=NA 1398
1277 cat("\n\n######################################### \nDetails on every analysis:\n\n", file=namefile, append=TRUE) 1399 cat("\n\n######################################### \nDetails on every analysis:\n\n", file = namefile, append = TRUE)
1278 cat("Analysis\tC1\tC2\tC3\tC4\tC5\tC6\tC7\tC8\tC9\tFinal rate", file=namefile, append=TRUE) 1400 cat("Analysis\tC1\tC2\tC3\tC4\tC5\tC6\tC7\tC8\tC9\tFinal rate", file = namefile, append = TRUE)
1279 apply(tabRate, 1, FUN=function(x) 1401 apply(tab_rate, 1, FUN = function(x) {
1280 { 1402
1281 1403 if (!is.na(x["complete_plan"]) && x["complete_plan"] == TRUE) {
1282 if (!is.na(x["complete_plan"]) && x["complete_plan"]==TRUE) 1404 cat("\n", x[1], "\tyes", file = namefile, append = TRUE)
1283 {
1284 cat("\n",x[1],"\tyes", file=namefile, append=TRUE)
1285 }else{ 1405 }else{
1286 cat("\n",x[1],"\tno", file=namefile, append=TRUE) 1406 cat("\n", x[1], "\tno", file = namefile, append = TRUE)
1287 } 1407 }
1288 1408
1289 for (i in c("balanced_plan","NA_proportion_OK", "no_residual_dispersion", "uniform_residuals", "outliers_proportion_OK", "no_zero_inflation", "observation_factor_ratio_OK", "enough_levels_random_effect")) 1409 for (i in c("balanced_plan", "NA_proportion_OK", "no_residual_dispersion", "uniform_residuals", "outliers_proportion_OK", "no_zero_inflation", "observation_factor_ratio_OK", "enough_levels_random_effect")) {
1290 { 1410 if (!is.na(x[i]) && x[i] == TRUE) {
1291 if (!is.na(x[i]) && x[i]==TRUE) 1411 cat("\tyes", file = namefile, append = TRUE)
1292 {
1293 cat("\tyes", file=namefile, append=TRUE)
1294 }else{ 1412 }else{
1295 cat("\tno", file=namefile, append=TRUE) 1413 cat("\tno", file = namefile, append = TRUE)
1296 } 1414 }
1297 } 1415 }
1298 1416
1299 cat("\t",x["rate"], "/ 8", file=namefile, append=TRUE) 1417 cat("\t", x["rate"], "/ 8", file = namefile, append = TRUE)
1300 1418
1301 1419
1302 }) 1420 })
1303 cat("\n\nC1: Complete plan?\nC2: Balanced plan?\nC3: Few NA?\nC4: Regular dispersion?\nC5: Uniform residuals?\nC6: Regular outliers proportion?\nC7: No zero-inflation?\nC8: Enough observations for the amount of factors?\nC9: Enough levels on random effect?", file=namefile, append=TRUE) 1421 cat("\n\nC1: Complete plan?\nC2: Balanced plan?\nC3: Few NA?\nC4: Regular dispersion?\nC5: Uniform residuals?\nC6: Regular outliers proportion?\nC7: No zero-inflation?\nC8: Good observation/factor ratio?\nC9: Enough levels on random effect?", file = namefile, append = TRUE)
1304 1422
1305 ## Red flags - advice : 1423 ## Red flags - advice :
1306 cat("\n\n######################################### \nRed flags - advice:\n\n", file=namefile, append=TRUE) 1424 cat("\n\n######################################### \nRed flags - advice:\n\n", file = namefile, append = TRUE)
1307 if (all(na.omit(tabRate["NA_proportion_OK"]) == FALSE)) 1425 if (all(na.omit(tab_rate["NA_proportion_OK"]) == FALSE)) {
1308 { 1426 cat("\n", "\t- More than 10% of lines of your dataset contains NAs", file = namefile, append = TRUE)
1309 cat("\n","\t- More than 10% of your dataset bares NAs", file=namefile, append=TRUE) 1427 }
1310 }else{} 1428
1311 1429 if (length(grep("FALSE", tab_rate["no_residual_dispersion"])) / length(na.omit(tab_rate["no_residual_dispersion"])) > 0.5) {
1312 if (length(grep("FALSE",tabRate["no_residual_dispersion"])) / length(na.omit(tabRate["no_residual_dispersion"])) > 0.5) 1430 cat("\n", "\t- More than 50% of your analyses are over- or under- dispersed : Try with another distribution family", file = namefile, append = TRUE)
1313 { 1431 }
1314 cat("\n","\t- More than 50% of your analyses are over- or under- dispersed : Try with another distribution family", file=namefile, append=TRUE) 1432
1315 }else{} 1433 if (length(grep("FALSE", tab_rate["uniform_residuals"])) / length(na.omit(tab_rate["uniform_residuals"])) > 0.5) {
1316 1434 cat("\n", "\t- More than 50% of your analyses haven't an uniform distribution of residuals : Try with another distribution family", file = namefile, append = TRUE)
1317 if (length(grep("FALSE",tabRate["uniform_residuals"])) / length(na.omit(tabRate["uniform_residuals"])) > 0.5) 1435 }
1318 { 1436
1319 cat("\n","\t- More than 50% of your analyses haven't an uniform distribution of residuals : Try with another distribution family", file=namefile, append=TRUE) 1437 if (length(grep("FALSE", tab_rate["outliers_proportion_OK"])) / length(na.omit(tab_rate["outliers_proportion_OK"])) > 0.5) {
1320 }else{} 1438 cat("\n", "\t- More than 50% of your analyses have too much outliers : Try with another distribution family or try to select or filter your data", file = namefile, append = TRUE)
1321 1439 }
1322 if (length(grep("FALSE",tabRate["outliers_proportion_OK"])) / length(na.omit(tabRate["outliers_proportion_OK"])) > 0.5) 1440
1323 { 1441 if (length(grep("FALSE", tab_rate["no_zero_inflation"])) / length(na.omit(tab_rate["no_zero_inflation"])) > 0.5) {
1324 cat("\n","\t- More than 50% of your analyses have too much outliers : Try with another distribution family or try to select your data", file=namefile, append=TRUE) 1442 cat("\n", "\t- More than 50% of your analyses have zero inflation : Try to select or filter your data", file = namefile, append = TRUE)
1325 }else{} 1443 }
1326 1444
1327 if (length(grep("FALSE",tabRate["no_zero_inflation"])) / length(na.omit(tabRate["no_zero_inflation"])) > 0.5) 1445 if (length(grep("FALSE", tab_rate["observation_factor_ratio_OK"])) / length(na.omit(tab_rate["observation_factor_ratio_OK"])) > 0.5) {
1328 { 1446 cat("\n", "\t- More than 50% of your analyses have not enough observations for the amount of factors : Try to use less factors in your analysis or try to use another separation factor", file = namefile, append = TRUE)
1329 cat("\n","\t- More than 50% of your analyses have zero inflation : Try to select your data", file=namefile, append=TRUE) 1447 }
1330 }else{} 1448
1331 1449 if (any(tab_rate["enough_levels_random_effect"] == FALSE, na.rm = TRUE) && length(grep("^glmmTMB", obj_lm$call)) > 0) {
1332 if (length(grep("FALSE",tabRate["observation_factor_ratio_OK"])) / length(na.omit(tabRate["observation_factor_ratio_OK"])) > 0.5) 1450 cat("\n", "\t- Random effect hasn't enough levels to be robust : If it has less than ten levels remove the random effect", file = namefile, append = TRUE)
1333 { 1451 }
1334 cat("\n","\t- More than 50% of your analyses have not enough observations for the amount of factors : Try to use less factors in your analysis or try to use another separation factor", file=namefile, append=TRUE) 1452 }else{
1335 }else{} 1453
1336 1454 return(rate_m)
1337 if (any(tabRate["enough_levels_random_effect"] == FALSE, na.rm=TRUE) && length(grep("^glmmTMB", objLM$call)) > 0) 1455
1338 { 1456 }
1339 cat("\n","\t- Random effect hasn't enough levels to be robust : If it has less than ten levels remove the random effect", file=namefile, append=TRUE) 1457 }else{
1340 }else{} 1458 cat("Models with quasi distributions can't be rated for now", file = namefile, append = TRUE)
1341 }else{ 1459 }
1342 1460 }
1343 return(RateM) 1461
1344 1462 ######################################### end of the function note_glm_f
1345 } 1463
1346 } 1464 ######################################### start of the function info_stats_f called by glm_species and glm_community
1347 1465
1348 ######################################### end of the function noteGLM.f 1466 info_stats_f <- function(filename, d_ata, agreg_level = c("species", "unitobs"), type = c("graph", "stat"),
1349 1467 metrique, fact_graph, fact_graph_sel, list_fact, list_fact_sel) {
1350 ######################################### start of the function infoStats.f called by modeleLineaireWP2.species.f and modeleLineaireWP2.unitobs.f 1468 ## Purpose: informations and simple statistics
1351 1469 ## ----------------------------------------------------------------------
1352 infoStats.f <- function(filename, Data, agregLevel=c("species", "unitobs"), type=c("graph", "stat"), 1470 ## Arguments: filename : name of file
1353 metrique, factGraph, factGraphSel, listFact, listFactSel) 1471 ## d_ata : input data
1354 { 1472 ## agreg_level : aggregation level
1355 ## Purpose: Écrire les infos et statistic sur les données associées à 1473 ## type : type of function calling
1356 ## un graphique ou analyse. 1474 ## metrique : selected metric
1357 ## ---------------------------------------------------------------------- 1475 ## fact_graph : selection factor
1358 ## Arguments: filename : chemin du fichier de résultats. 1476 ## fact_graph_sel : list of factors levels selected for this factor
1359 ## Data : données du graphique/de l'analyse. 1477 ## list_fact : list of grouping factors
1360 ## agregLevel : niveau d'agrégation de la fonction appelante. 1478 ## list_fact_sel : list of factors levels selected for these factors
1361 ## type : type de fonction appelante (grapique ou analyse).
1362 ## metrique : la métrique choisie.
1363 ## factGraph : le facteur sélection des espèces.
1364 ## factGraphSel : la sélection de modalités pour ce dernier
1365 ## listFact : liste du (des) facteur(s) de regroupement
1366 ## listFactSel : liste des modalités sélectionnées pour ce(s)
1367 ## dernier(s)
1368 ## ---------------------------------------------------------------------- 1479 ## ----------------------------------------------------------------------
1369 ## Author: Yves Reecht, Date: 10 sept. 2012, 15:26 modified by Coline ROYAUX 04 june 2020 1480 ## Author: Yves Reecht, Date: 10 sept. 2012, 15:26 modified by Coline ROYAUX 04 june 2020
1370 1481
1371 ## Open file : 1482 ## Open file :
1372 File <- file(description=filename, 1483 f_ile <- file(description = filename,
1373 open="w", encoding="latin1") 1484 open = "w", encoding = "latin1")
1374 1485
1375 ## if error : 1486 ## if error :
1376 on.exit(if (exists("filename") && 1487 on.exit(if (exists("filename") &&
1377 tryCatch(isOpen(File), 1488 tryCatch(isOpen(f_ile),
1378 error=function(e)return(FALSE))) close(File)) 1489 error = function(e)return(FALSE))) close(f_ile))
1379 1490
1380 ## Metrics and factors infos : 1491 ## Metrics and factors infos :
1381 printSelectionInfo.f(metrique=metrique, #factGraph=factGraph, factGraphSel=factGraphSel, 1492 print_selection_info_f(metrique = metrique, #fact_graph = fact_graph, fact_graph_sel = fact_graph_sel,
1382 listFact=listFact, #listFactSel=listFactSel, 1493 list_fact = list_fact, #list_fact_sel = list_fact_sel,
1383 File=File, 1494 f_ile = f_ile,
1384 agregLevel=agregLevel, type=type) 1495 agreg_level = agreg_level, type = type)
1385 1496
1386 ## statistics : 1497 ## statistics :
1387 if (class(Data) == "list") 1498 if (class(d_ata) == "list") {
1388 {
1389 cat("\n###################################################", 1499 cat("\n###################################################",
1390 "\nStatistics per level of splitting factor:\n", 1500 "\nStatistics per level of splitting factor:\n",
1391 sep="", file=File,append=TRUE) 1501 sep = "", file = f_ile, append = TRUE)
1392 1502
1393 invisible(sapply(1:length(Data), 1503 invisible(sapply(seq_len(length(d_ata)),
1394 function(i) 1504 function(i) {
1395 { 1505 print_stats_f(d_ata = d_ata[[i]], metrique = metrique, list_fact = list_fact, f_ile = f_ile,
1396 printStats.f(Data=Data[[i]], metrique=metrique, listFact=listFact, File=File, 1506 headline = fact_graph_sel[i])
1397 headline=factGraphSel[i])
1398 })) 1507 }))
1399 }else{ 1508 }else{
1400 printStats.f(Data=Data, metrique=metrique, listFact=listFact, File=File, 1509 print_stats_f(d_ata = d_ata, metrique = metrique, list_fact = list_fact, f_ile = f_ile,
1401 headline=NULL) 1510 headline = NULL)
1402 } 1511 }
1403 1512
1404 ## Close file : 1513 ## Close file :
1405 close(File) 1514 close(f_ile)
1406 1515
1407 } 1516 }
1408 1517
1409 ######################################### end of the function infoStats.f 1518 ######################################### end of the function info_stats_f
1410 1519
1411 1520
1412 ######################################### start of the function printSelectionInfo.f called by infoStats.f 1521 ######################################### start of the function print_selection_info_f called by info_stats_f
1413 1522
1414 printSelectionInfo.f <- function(metrique, listFact, 1523 print_selection_info_f <- function(metrique, list_fact,
1415 File, 1524 f_ile,
1416 agregLevel=c("species", "unitobs"), type=c("graph", "stat")) 1525 agreg_level = c("species", "unitobs"), type = c("graph", "stat")) {
1417 {
1418 ## Purpose: Write data informations 1526 ## Purpose: Write data informations
1419 ## ---------------------------------------------------------------------- 1527 ## ----------------------------------------------------------------------
1420 ## Arguments: metrique : chosen metric 1528 ## Arguments: metrique : chosen metric
1421 ## listFact : factor's list 1529 ## list_fact : factor's list
1422 ## File : Results file name 1530 ## f_ile : Results file name
1423 ## agregLevel : aggregation level 1531 ## agreg_level : aggregation level
1424 ## type : function type 1532 ## type : function type
1425 ## ---------------------------------------------------------------------- 1533 ## ----------------------------------------------------------------------
1426 ## Author: Yves Reecht, Date: 11 sept. 2012, 10:41 modified by Coline ROYAUX 04 june 2020 1534 ## Author: Yves Reecht, Date: 11 sept. 2012, 10:41 modified by Coline ROYAUX 04 june 2020
1427 1535
1428 cat("\n##################################################\n", 1536 cat("\n##################################################\n",
1429 "Metrics and factors (and possible units/selections):\n", 1537 "Metrics and factors (and possible units/selections):\n",
1430 sep="", file=File,append=TRUE) 1538 sep = "", file = f_ile, append = TRUE)
1431 1539
1432 ## metric info : 1540 ## metric info :
1433 cat("\n Metrics:", metrique, 1541 cat("\n Metrics:", metrique,
1434 "\n", file=File,append=TRUE) 1542 "\n", file = f_ile, append = TRUE)
1435
1436 ## aggregation level :
1437 cat(" aggregated per ",
1438 switch(agregLevel,
1439 "CL_espece"=,"CL_unitobs"=,"spCL_unitobs"=,"spCL_espece"={
1440 "size class / "
1441 }),
1442 switch(agregLevel,
1443 "CL_espece"=,"spCL_espece"=,"species"=,"spSpecies"=,"spEspece"={
1444 "species / "
1445 }),
1446 switch(agregLevel,
1447 "spUnitobs"=,"spCL_unitobs"=,"spCL_espece"=,"spUnitobs(CL)"=,"spSpecies"=,"spEspece"={
1448 paste(listFact, " (mean over ", sep="")
1449 }),
1450 "observation units",
1451 switch(agregLevel,
1452 "spUnitobs"=,"spCL_unitobs"=,"spCL_espece"=,"spUnitobs(CL)"=,"spSpecies"=,"spEspece"={
1453 ")"
1454 }),
1455 ".\n",
1456 sep="", file=File,append=TRUE)
1457
1458 ## Separation factors :
1459 # switch(agregLevel,
1460 # "species"=,"CL_espece"=,"espece"={ # Adapté également pour les LMs.
1461 # cat("\n",
1462 # switch(type,
1463 # "graph"="Graphics separation factor",
1464 # "stat"="Analyses separation factor"),
1465 # " : ",
1466 # ifelse(factGraph == "", "printSelectionInfo.f.11",
1467 # ifelse(is.na(factGraphSel[1]),
1468 # paste(varNames.f(factGraph, "nom"), "none!"),
1469 # paste(varNames.f(factGraph, "nom"), " (",
1470 # paste(factGraphSel, collapse=", "), ")", sep=""))), "\n",
1471 # sep="", file=File,append=TRUE)
1472 # },
1473 # "unitobs"=,"CL_unitobs"=,"unitobs(CL)"=,"spUnitobs"={
1474 # cat("(warning: no selection!!!)",
1475 # ifelse(factGraph == "", "\nSelection factor for aggregation of observations: ",
1476 # ifelse(is.na(factGraphSel[1]),
1477 # paste(varNames.f(factGraph, "nom"), "none (all species/size classes)!"),
1478 # paste(varNames.f(factGraph, "nom"), " (",
1479 # paste(factGraphSel, collapse=", "), ")", sep=""))), "\n",
1480 # sep="", file=File,append=TRUE)
1481 # })
1482 1543
1483 ## Clustering factors : 1544 ## Clustering factors :
1484 if (is.element(agregLevel, c("spCL_unitobs", "spCL_espece", "spSpecies", "spEspece", 1545 if (is.element(agreg_level, c("spCL_unitobs", "spCL_espece", "spSpecies", "spEspece",
1485 "spUnitobs", "spUnitobs(CL)"))) {type <- "spatialGraph"} 1546 "spUnitobs", "spUnitobs(CL)"))) {
1547 type <- "spatialGraph"
1548 }
1486 1549
1487 cat(switch(type, 1550 cat(switch(type,
1488 "graph"="\nGrouping factor(s): \n * ", 1551 "graph" = "\nGrouping factor(s): \n * ",
1489 "stat"="\nAnalyses factor(s): \n * ", 1552 "stat" = "\nAnalyses factor(s): \n * ",
1490 "spatialGraph"="\nSpatial aggregation factor(s): \n * "), 1553 "spatialGraph" = "\nSpatial aggregation factor(s): \n * "),
1491 paste(listFact,collaspe="\n * "),"\n",file=File,append=TRUE) 1554 paste(list_fact, collaspe = "\n * "), "\n", file = f_ile, append = TRUE)
1492 1555
1493 # invisible(sapply(1:length(listFact), 1556 }
1494 # function(i) 1557
1495 # { 1558 ######################################### end of the function print_selection_info_f
1496 # cat("\n * ", 1559
1497 # ifelse(is.na(listFactSel[[i]][1]), 1560
1498 # paste(varNames.f(listFact[i], "nom"), "(no selection)"), 1561 ######################################### start of the function print_stats_f called by info_stats_f
1499 # paste(varNames.f(listFact[i], "nom"), " (", 1562
1500 # paste(listFactSel[[i]], collapse=", "), ")", sep="")), "\n", 1563 print_stats_f <- function(d_ata, metrique, list_fact, f_ile, headline = NULL) {
1501 # sep="", file=File,append=TRUE)
1502 # }))
1503 }
1504
1505 ######################################### end of the function printSelectionInfo.f
1506
1507
1508 ######################################### start of the function printStats.f called by infoStats.f
1509
1510 printStats.f <- function(Data, metrique, listFact, File, headline=NULL)
1511 {
1512 ## Purpose: Write general statistics table 1564 ## Purpose: Write general statistics table
1513 ## ---------------------------------------------------------------------- 1565 ## ----------------------------------------------------------------------
1514 ## Arguments: Data : Analysis data 1566 ## Arguments: d_ata : Analysis data
1515 ## metrique : metric's name 1567 ## metrique : metric's name
1516 ## listFact : Factor's list 1568 ## list_fact : Factor's list
1517 ## File : Simple statistics file name 1569 ## f_ile : Simple statistics file name
1518 ## ---------------------------------------------------------------------- 1570 ## ----------------------------------------------------------------------
1519 ## Author: Yves Reecht, Date: 11 sept. 2012, 10:09 modified by Coline ROYAUX 04 june 2020 1571 ## Author: Yves Reecht, Date: 11 sept. 2012, 10:09 modified by Coline ROYAUX 04 june 2020
1520 1572
1521 ## Header : 1573 ## Header :
1522 if ( ! is.null(headline)) 1574 if (! is.null(headline)) {
1523 {
1524 cat("\n", rep("#", nchar(headline) + 3), "\n", 1575 cat("\n", rep("#", nchar(headline) + 3), "\n",
1525 "## ", headline, "\n", 1576 "## ", headline, "\n",
1526 sep="", file=File,append=TRUE) 1577 sep = "", file = f_ile, append = TRUE)
1527 }else{} 1578 }
1528 1579
1529 cat("\n########################\nBase statistics:\n\n", file=File,append=TRUE) 1580 cat("\n########################\nBase statistics:\n\n", file = f_ile, append = TRUE)
1530 1581
1531 capture.output(print(summary.fr(Data[ , metrique])), file=File, append=TRUE) 1582 capture.output(print(summary_fr(d_ata[, metrique])), file = f_ile, append = TRUE)
1532 1583
1533 if ( ! is.null(listFact)) 1584 if (! is.null(list_fact)) {
1534 {
1535 cat("\n#########################################", 1585 cat("\n#########################################",
1536 "\nStatistics per combination of factor levels:\n\n", file=File, sep="",append=TRUE) 1586 "\nStatistics per combination of factor levels:\n\n", file = f_ile, sep = "", append = TRUE)
1537 1587
1538 ## Compute summary for each existing factor's cross : 1588 ## Compute summary for each existing factor's cross :
1539 res <- with(Data, 1589 res <- with(d_ata,
1540 tapply(eval(parse(text=metrique)), 1590 tapply(eval(parse(text = metrique)),
1541 INDEX=do.call(paste, 1591 INDEX = do.call(paste,
1542 c(lapply(listFact, 1592 c(lapply(list_fact,
1543 function(y)eval(parse(text=y))), 1593 function(y)eval(parse(text = y))),
1544 sep=".")), 1594 sep = ".")),
1545 FUN=summary.fr)) 1595 FUN = summary_fr))
1546 1596
1547 ## results in table 1597 ## results in table
1548 capture.output(print(do.call(rbind, res)), 1598 capture.output(print(do.call(rbind, res)),
1549 file=File, append=TRUE) 1599 file = f_ile, append = TRUE)
1550 }else{} 1600 }
1551 1601
1552 ## empty line : 1602 ## empty line :
1553 cat("\n", file=File,append=TRUE) 1603 cat("\n", file = f_ile, append = TRUE)
1554 } 1604 }
1555 1605
1556 ######################################### end of the function printStats.f 1606 ######################################### end of the function print_stats_f
1557 1607
1558 1608
1559 ######################################### start of the function summary.fr called by printStats.f 1609 ######################################### start of the function summary_fr called by print_stats_f
1560 summary.fr <- function(object, digits = max(3, getOption("digits") - 3),...) 1610 summary_fr <- function(object, digits = max(3, getOption("digits") - 3), ...) {
1561 {
1562 ## Purpose: Adding SD and N to summary 1611 ## Purpose: Adding SD and N to summary
1563 ## ---------------------------------------------------------------------- 1612 ## ----------------------------------------------------------------------
1564 ## Arguments: object : Object to summarise 1613 ## Arguments: object : Object to summarise
1565 ## ---------------------------------------------------------------------- 1614 ## ----------------------------------------------------------------------
1566 ## Author: Yves Reecht, Date: 13 sept. 2012, 15:47 modified by Coline ROYAUX 04 june 2020 1615 ## Author: Yves Reecht, Date: 13 sept. 2012, 15:47 modified by Coline ROYAUX 04 june 2020
1567 1616
1568 if ( ! is.numeric(object)) stop("Programming error") 1617 if (! is.numeric(object)) stop("Programming error")
1569 1618
1570 ## Compute summary : 1619 ## Compute summary :
1571 res <- c(summary(object=object, digits, ...), "sd"=signif(sd(x=object), digits=digits), "N"=length(object)) 1620 res <- c(summary(object = object, digits, ...), "sd" = signif(sd(x = object), digits = digits), "N" = length(object))
1572 1621
1573 return(res) 1622 return(res)
1574 } 1623 }
1575 1624
1576 ######################################### start of the function summary.fr 1625 ######################################### start of the function summary_fr
1577
1578 ######################################### Package DHARMa
1579
1580 ################ simulateResiduals.R
1581
1582 #' Create simulated residuals
1583 #'
1584 #' The function creates scaled residuals by simulating from the fitted model. Residuals can be extracted with \code{\link{residuals.DHARMa}}. See \code{\link{testResiduals}} for an overview of residual tests, \code{\link{plot.DHARMa}} for an overview of available plots.
1585 #'
1586 #' @param fittedModel a fitted model of a class supported by DHARMa
1587 #' @param n number of simulations. Default is 100. A more save value would be 250 or even 1000. The smaller the number, the higher the stochastic error on the residuals. Also, for very small n, discretization artefacts can influence the tests.
1588 #' @param refit if FALSE, new data will be simulated and scaled residuals will be created by comparing observed data with new data. If TRUE, the model will be refit on the simulated data (parametric bootstrap), and scaled residuals will be created by comparing observed with refitted residuals.
1589 #' @param integerResponse if TRUE, noise will be added at to the residuals to maintain a uniform expectations for integer responses (such as Poisson or Binomial). Usually, the model will automatically detect the appropriate setting, so there is no need to adjust this setting.
1590 #' @param plot if TRUE, \code{\link{plotResiduals}} will be directly run after the residuals have been calculated
1591 #' @param ... parameters to pass to the simulate function of the model object. An important use of this is to specify whether simulations should be conditional on the current random effect estimates, e.g. via re.form. Note that not all models support syntax to specify conditionao or unconditional simulations. See also details
1592 #' @param seed the random seed to be used within DHARMa. The default setting, recommended for most users, is keep the random seed on a fixed value 123. This means that you will always get the same randomization and thus teh same result when running the same code. NULL = no new seed is set, but previous random state will be restored after simulation. FALSE = no seed is set, and random state will not be restored. The latter two options are only recommended for simulation experiments. See vignette for details.
1593 #' @param method the quantile randomization method used. The two options implemented at the moment are probability integral transform (PIT-) residuals (current default), and the "traditional" randomization procedure, that was used in DHARMa until version 0.3.0. For details, see \code{\link{getQuantile}}
1594 #' @return An S3 class of type "DHARMa", essentially a list with various elements. Implemented S3 functions include plot, print and \code{\link{residuals.DHARMa}}. Residuals returns the calculated scaled residuals.
1595 #'
1596 #' @details There are a number of important considerations when simulating from a more complex (hierarchical) model:
1597 #'
1598 #' \strong{Re-simulating random effects / hierarchical structure}: in a hierarchical model, we have several stochastic processes aligned on top of each other. Specifically, in a GLMM, we have a lower level stochastic process (random effect), whose result enters into a higher level (e.g. Poisson distribution). For other hierarchical models such as state-space models, similar considerations apply.
1599 #'
1600 #' In such a situation, we have to decide if we want to re-simulate all stochastic levels, or only a subset of those. For example, in a GLMM, it is common to only simulate the last stochastic level (e.g. Poisson) conditional on the fitted random effects. This is often referred to as a conditional simuation. For controlling how many levels should be re-simulated, the simulateResidual function allows to pass on parameters to the simulate function of the fitted model object. Please refer to the help of the different simulate functions (e.g. ?simulate.merMod) for details. For merMod (lme4) model objects, the relevant parameters are parameters are use.u and re.form
1601 #'
1602 #' If the model is correctly specified, the simulated residuals should be flat regardless how many hierarchical levels we re-simulate. The most thorough procedure would therefore be to test all possible options. If testing only one option, I would recommend to re-simulate all levels, because this essentially tests the model structure as a whole. This is the default setting in the DHARMa package. A potential drawback is that re-simulating the lower-level random effects creates more variability, which may reduce power for detecting problems in the upper-level stochastic processes. In particular dispersion tests may produce different results when switching from conditional to unconditional simulations, and often the conditional simulation is more sensitive.
1603 #'
1604 #' \strong{Integer responses}: a second complication is the treatment of inter responses. Imaging we have observed a 0, and we predict 30\% zeros - what is the quantile that we should display for the residual? To deal with this problem and maintain a uniform response, the option integerResponse adds a uniform noise from -0.5 to 0.5 on the simulated and observed response, which creates a uniform distribution - you can see this via hist(ecdf(runif(10000))(runif(10000))).
1605 #'
1606 #' DHARMa will try to automatically if the fitted model has an integer or discrete distribution via the family argument. However, in some cases the family does not allow to uniquely identify the distribution type. For example, a tweedie distribution can be inter or continuous. Therefore, DHARMa will additionally check the simulation results for repeated values, and will change the distribution type if repeated values are found (a message is displayed in this case).
1607 #'
1608 #' \strong{Refitting or not}: a third issue is how residuals are calculated. simulateResiduals has two options that are controlled by the refit parameter:
1609 #'
1610 #' 1. if refit = FALSE (default), new data is simulated from the fitted model, and residuals are calculated by comparing the observed data to the new data
1611 #'
1612 #' 2. if refit = TRUE, a parametric bootstrap is performed, meaning that the model is refit on the new data, and residuals are created by comparing observed residuals against refitted residuals. I advise against using this method per default (see more comments in the vignette), unless you are really sure that you need it.
1613 #'
1614 #' \strong{Residuals per group}: In many situations, it can be useful to look at residuals per group, e.g. to see how much the model over / underpredicts per plot, year or subject. To do this, use \code{\link{recalculateResiduals}}, together with a grouping variable (see also help)
1615 #'
1616 #' \strong{Transformation to other distributions}: DHARMa calculates residuals for which the theoretical expectation (assuming a correctly specified model) is uniform. To transfor this residuals to another distribution (e.g. so that a correctly specified model will have normal residuals) see \code{\link{residuals.DHARMa}}.
1617 #'
1618 #' @seealso \code{\link{testResiduals}}, \code{\link{plot.DHARMa}}, \code{\link{plotResiduals}}, \code{\link{print.DHARMa}}, \code{\link{residuals.DHARMa}}, \code{\link{recalculateResiduals}}
1619 #'
1620 #'
1621 #' @example inst/examples/simulateResidualsHelp.R
1622 #' @import stats
1623 #' @export
1624 simulateResiduals <- function(fittedModel, n = 250, refit = F, integerResponse = NULL, plot = F, seed = 123, method = c("PIT", "traditional"), ...){
1625
1626 ######## general assertions and startup calculations ##########
1627
1628 if (n < 2) stop("error in DHARMa::simulateResiduals: n > 1 is required to calculate scaled residuals")
1629 checkModel(fittedModel)
1630 match.arg(method)
1631 randomState <-getRandomState(seed)
1632 on.exit({randomState$restoreCurrent()})
1633 ptm <- proc.time()
1634
1635 ####### extract model info ############
1636
1637 out = list()
1638
1639 family = family(fittedModel)
1640 out$fittedModel = fittedModel
1641 out$modelClass = class(fittedModel)[1]
1642
1643 out$nObs = nobs(fittedModel)
1644 out$nSim = n
1645 out$refit = refit
1646 out$observedResponse = getObservedResponse(fittedModel)
1647
1648 if(is.null(integerResponse)){
1649 if (family$family %in% c("binomial", "poisson", "quasibinomial", "quasipoisson", "Negative Binom", "nbinom2", "nbinom1", "genpois", "compois", "truncated_poisson", "truncated_nbinom2", "truncated_nbinom1", "betabinomial", "Poisson", "Tpoisson", "COMPoisson", "negbin", "Tnegbin") | grepl("Negative Binomial",family$family) ) integerResponse = TRUE
1650 else integerResponse = FALSE
1651 }
1652 out$integerResponse = integerResponse
1653
1654 out$problems = list()
1655
1656 # re-form should be set to ~0 to avoid spurious residual patterns, see https://github.com/florianhartig/DHARMa/issues/43
1657
1658 if(out$modelClass %in% c("HLfit")){
1659 out$fittedPredictedResponse = predict(fittedModel, type = "response", re.form = ~0)[,1L]
1660 }else{
1661 out$fittedPredictedResponse = predict(fittedModel, type = "response", re.form = ~0)
1662 }
1663
1664 out$fittedFixedEffects = getFixedEffects(fittedModel)
1665 out$fittedResiduals = residuals(fittedModel, type = "response")
1666
1667 ######## refit = F ##################
1668
1669 if (refit == FALSE){
1670
1671 out$simulatedResponse = getSimulations(fittedModel, nsim = n, type = "normal", ...)
1672
1673 checkSimulations(out$simulatedResponse, out$nObs, out$nSim)
1674
1675 out$scaledResiduals = getQuantile(simulations = out$simulatedResponse , observed = out$observedResponse , integerResponse = integerResponse, method = method)
1676
1677 ######## refit = T ##################
1678 } else {
1679
1680 # Adding new outputs
1681
1682 out$refittedPredictedResponse <- matrix(nrow = out$nObs, ncol = n )
1683 out$refittedFixedEffects <- matrix(nrow = length(out$fittedFixedEffects), ncol = n )
1684 #out$refittedRandomEffects <- matrix(nrow = length(out$fittedRandomEffects), ncol = n )
1685 out$refittedResiduals = matrix(nrow = out$nObs, ncol = n)
1686 out$refittedPearsonResiduals = matrix(nrow = out$nObs, ncol = n)
1687
1688 out$simulatedResponse = getSimulations(fittedModel, nsim = n, type = "refit", ...)
1689
1690 for (i in 1:n){
1691
1692 simObserved = out$simulatedResponse[[i]]
1693
1694 try({
1695
1696 # for testing
1697 # if (i==3) stop("x")
1698 # Note: also set silent = T for production
1699
1700 refittedModel = getRefit(fittedModel, simObserved)
1701
1702 out$refittedPredictedResponse[,i] = predict(refittedModel, type = "response")
1703 out$refittedFixedEffects[,i] = getFixedEffects(refittedModel)
1704 out$refittedResiduals[,i] = residuals(refittedModel, type = "response")
1705 out$refittedPearsonResiduals[,i] = residuals(refittedModel, type = "pearson")
1706 #out$refittedRandomEffects[,i] = ranef(refittedModel)
1707 }, silent = TRUE)
1708 }
1709
1710 ######### residual checks ###########
1711
1712 if(anyNA(out$refittedResiduals)) warning("DHARMa::simulateResiduals warning: on refit = TRUE, at least one of the refitted models produced an error. Inspect the refitted model values. Results may not be reliable.")
1713
1714 ## check for convergence problems
1715
1716 dup = sum(duplicated(out$refittedFixedEffects, MARGIN = 2))
1717 if (dup > 0){
1718 if (dup < n/3){
1719 warning(paste("There were", dup, "of", n ,"duplicate parameter estimates in the refitted models. This may hint towards a problem with optimizer convergence in the fitted models. Results may not be reliable. The suggested action is to not use the refitting procedure, and diagnose with tools available for the normal (not refitted) simulated residuals. If you absolutely require the refitting procedure, try changing tolerance / iterations in the optimizer settings."))
1720 } else {
1721 warning(paste("There were", dup, "of", n ,"duplicate parameter estimates in the refitted models. This may hint towards a problem with optimizer convergence in the fitted models. Results are likely not reliable. The suggested action is to not use the refitting procedure, and diagnose with tools available for the normal (not refitted) simulated residuals. If you absolutely require the refitting procedure, try changing tolerance / iterations in the optimizer settings."))
1722 out$problems[[length(out$problems)+ 1]] = "error in refit"
1723 }
1724 }
1725
1726 ######### residual calculations ###########
1727
1728 out$scaledResiduals = getQuantile(simulations = out$refittedResiduals, observed = out$fittedResiduals, integerResponse = integerResponse, method = method)
1729 }
1730
1731 ########### Wrapup ############
1732
1733 out$time = proc.time() - ptm
1734 out$randomState = randomState
1735
1736 class(out) = "DHARMa"
1737
1738 if(plot == TRUE) plot(out)
1739
1740 return(out)
1741 }
1742
1743 getPossibleModels<-function()c("lm", "glm", "negbin", "lmerMod", "glmerMod", "gam", "bam", "glmmTMB", "HLfit")
1744
1745
1746
1747 #' Check if the fitted model is supported by DHARMa
1748 #'
1749 #' The function checks if the fitted model is supported by DHARMa, and if there are other issues that could create problems
1750 #'
1751 #' @param fittedModel a fitted model
1752 #' @param stop whether to throw an error if the model is not supported by DHARMa
1753 #'
1754 #' @details The main purpose of this function os to check if the fitted model class is supported by DHARMa. The function additionally checks for properties of the fitted model that could create problems for calculating residuals or working with the resuls in DHARMa.
1755 #'
1756 #'
1757 #' @keywords internal
1758 checkModel <- function(fittedModel, stop = F){
1759
1760 out = T
1761
1762 if(!(class(fittedModel)[1] %in% getPossibleModels())){
1763 if(stop == FALSE) warning("DHARMa: fittedModel not in class of supported models. Absolutely no guarantee that this will work!")
1764 else stop("DHARMa: fittedModel not in class of supported models")
1765 }
1766
1767 # if(hasNA(fittedModel)) message("It seems there were NA values in the data used for fitting the model. This can create problems if you supply additional data to DHARMa functions. See ?checkModel for details")
1768
1769 # TODO: check as implemented does not work reliably, check if there is any other option to check for NA
1770 # #' @example inst/examples/checkModelHelp.R
1771
1772 # NA values in the data: checkModel will detect if there were NA values in the data frame. For NA values, most regression models will remove the entire observation from the data. This is not a problem for DHARMa - residuals are then only calculated for non-NA rows in the data. However, if you provide additional predictors to DHARMa, for example to plot residuals against a predictor, you will have to remove all NA rows that were also removed in the model. For most models, you can get the rows of the data that were actually used in the fit via rownames(model.frame(fittedModel))
1773
1774
1775 if (class(fittedModel)[1] == "gam" ) if (class(fittedModel$family)[1] == "extended.family") stop("It seems you are trying to fit a model from mgcv that was fit with an extended.family. Simulation functions for these families are not yet implemented in DHARMa. See issue https://github.com/florianhartig/DHARMa/issues/11 for updates about this")
1776
1777 }
1778
1779
1780
1781 #' Check simulated data
1782 #'
1783 #' The function checks if the simulated data seems fine
1784 #'
1785 #' @param simulatedResponse the simulated response
1786 #' @param nObs number of observations
1787 #' @param nSim number of simulations
1788 #'
1789 #' @keywords internal
1790 checkSimulations <- function(simulatedResponse, nObs, nSim){
1791
1792 if(!inherits(simulatedResponse, "matrix")) securityAssertion("Simulation from the model produced wrong class", stop = T)
1793
1794 if(any(dim(simulatedResponse) != c(nObs, nSim) )) securityAssertion("Simulation from the model produced wrong dimension", stop = T)
1795
1796 if(any(!is.finite(simulatedResponse))) message("Simulations from your fitted model produce infinite values. Consider if this is sensible")
1797
1798 if(any(is.nan(simulatedResponse))) securityAssertion("Simulations from your fitted model produce NaN values. DHARMa cannot calculated residuals for this. This is nearly certainly an error of the regression package you are using", stop = T)
1799 if(any(is.na(simulatedResponse))) securityAssertion("Simulations from your fitted model produce NA values. DHARMa cannot calculated residuals for this. This is nearly certainly an error of the regression package you are using", stop = T)
1800
1801 }
1802
1803
1804
1805
1806 #' Recalculate residuals with grouping
1807 #'
1808 #' The purpose of this function is to recalculate scaled residuals per group, based on the simulations done by \code{\link{simulateResiduals}}
1809 #'
1810 #' @param simulationOutput an object with simulated residuals created by \code{\link{simulateResiduals}}
1811 #' @param group group of each data point
1812 #' @param aggregateBy function for the aggregation. Default is sum. This should only be changed if you know what you are doing. Note in particular that the expected residual distribution might not be flat any more if you choose general functions, such as sd etc.
1813 #' @param seed the random seed to be used within DHARMa. The default setting, recommended for most users, is keep the random seed on a fixed value 123. This means that you will always get the same randomization and thus teh same result when running the same code. NULL = no new seed is set, but previous random state will be restored after simulation. FALSE = no seed is set, and random state will not be restored. The latter two options are only recommended for simulation experiments. See vignette for details.
1814 #' @param method the quantile randomization method used. The two options implemented at the moment are probability integral transform (PIT-) residuals (current default), and the "traditional" randomization procedure, that was used in DHARMa until version 0.3.0. For details, see \code{\link{getQuantile}}
1815 #' @return an object of class DHARMa, similar to what is returned by \code{\link{simulateResiduals}}, but with additional outputs for the new grouped calculations. Note that the relevant outputs are 2x in the object, the first is the grouped calculations (which is returned by $name access), and later another time, under identical name, the original output. Moreover, there is a function 'aggregateByGroup', which can be used to aggregate predictor variables in the same way as the variables calculated here
1816 #'
1817 #' @example inst/examples/simulateResidualsHelp.R
1818 #' @export
1819 recalculateResiduals <- function(simulationOutput, group = NULL, aggregateBy = sum, seed = 123, method = c("PIT", "traditional")){
1820
1821 randomState <-getRandomState(seed)
1822 on.exit({randomState$restoreCurrent()})
1823 match.arg(method)
1824
1825 if(!is.null(simulationOutput$original)) simulationOutput = simulationOutput$original
1826
1827 out = list()
1828 out$original = simulationOutput
1829
1830 if(is.null(group)) return(simulationOutput)
1831 else group =as.factor(group)
1832 out$nGroups = nlevels(group)
1833
1834 aggregateByGroup <- function(x) aggregate(x, by=list(group), FUN=aggregateBy)[,2]
1835
1836 out$observedResponse = aggregateByGroup(simulationOutput$observedResponse)
1837 out$fittedPredictedResponse = aggregateByGroup(simulationOutput$fittedPredictedResponse)
1838
1839 if (simulationOutput$refit == F){
1840
1841 out$simulatedResponse = apply(simulationOutput$simulatedResponse, 2, aggregateByGroup)
1842 out$scaledResiduals = getQuantile(simulations = out$simulatedResponse , observed = out$observedResponse , integerResponse = simulationOutput$integerResponse, method = method)
1843
1844 ######## refit = T ##################
1845 } else {
1846
1847 out$refittedPredictedResponse <- apply(simulationOutput$refittedPredictedResponse, 2, aggregateByGroup)
1848 out$fittedResiduals = aggregateByGroup(simulationOutput$fittedResiduals)
1849 out$refittedResiduals = apply(simulationOutput$refittedResiduals, 2, aggregateByGroup)
1850 out$refittedPearsonResiduals = apply(simulationOutput$refittedPearsonResiduals, 2, aggregateByGroup)
1851
1852 out$scaledResiduals = getQuantile(simulations = out$refittedResiduals , observed = out$fittedResiduals , integerResponse = simulationOutput$integerResponse, method = method)
1853
1854 }
1855
1856 # hack - the c here will result in both old and new outputs to be present resulting output, but a named access should refer to the new, grouped calculations
1857 # question to myself - what's the use of that, why not erase the old outputs? they are anyway saved in the old object
1858
1859 out$aggregateByGroup = aggregateByGroup
1860 out = c(out, simulationOutput)
1861 out$randomState = randomState
1862 class(out) = "DHARMa"
1863 return(out)
1864 }
1865
1866 ################ simulateResiduals.R
1867
1868 ################ DHARMa.R
1869
1870 #' @title DHARMa - Residual Diagnostics for HierArchical (Multi-level / Mixed) Regression Models
1871 #' @name DHARMa
1872 #' @docType package
1873 #' @description The 'DHARMa' package uses a simulation-based approach to create readily interpretable scaled (quantile) residuals for fitted (generalized) linear mixed models. Currently supported are linear and generalized linear (mixed) models from 'lme4' (classes 'lmerMod', 'glmerMod'), 'glmmTMB' and 'spaMM', generalized additive models ('gam' from 'mgcv'), 'glm' (including 'negbin' from 'MASS', but excluding quasi-distributions) and 'lm' model classes. Moreover, externally created simulations, e.g. posterior predictive simulations from Bayesian software such as 'JAGS', 'STAN', or 'BUGS' can be processed as well. The resulting residuals are standardized to values between 0 and 1 and can be interpreted as intuitively as residuals from a linear regression. The package also provides a number of plot and test functions for typical model misspecification problems, such as over/underdispersion, zero-inflation, and residual spatial and temporal autocorrelation.
1874 #' @details See index / vignette for details
1875 #' @seealso \code{\link{simulateResiduals}}
1876 #' @examples
1877 #' vignette("DHARMa", package="DHARMa")
1878 NULL
1879
1880
1881 #' Print simulated residuals
1882 #'
1883 #' @param x an object with simulated residuals created by \code{\link{simulateResiduals}}
1884 #' @param ... optional arguments for compatibility with the generic function, no function implemented
1885 #' @export
1886 print.DHARMa <- function(x, ...){
1887 cat(paste("Object of Class DHARMa with simulated residuals based on", x$nSim, "simulations with refit =", x$refit , ". See ?DHARMa::simulateResiduals for help."), "\n", "\n")
1888 if (length(x$scaledResiduals) < 20) cat("Scaled residual values:", x$scaledResiduals)
1889 else {
1890 cat("Scaled residual values:", x$scaledResiduals[1:20], "...")
1891 }
1892 }
1893
1894 #' Return residuals of a DHARMa simulation
1895 #'
1896 #' @param object an object with simulated residuals created by \code{\link{simulateResiduals}}
1897 #' @param quantileFunction optional - a quantile function to transform the uniform 0/1 scaling of DHARMa to another distribution
1898 #' @param outlierValues if a quantile function with infinite support (such as dnorm) is used, residuals that are 0/1 are mapped to -Inf / Inf. outlierValues allows to convert -Inf / Inf values to an optional min / max value.
1899 #' @param ... optional arguments for compatibility with the generic function, no function implemented
1900 #' @details the function accesses the slot $scaledResiduals in a fitted DHARMa object, and optionally transforms the standard DHARMa quantile residuals (which have a uniform distribution) to a particular pdf.
1901 #'
1902 #' @note some of the papers on simulated quantile residuals transforming the residuals (which are natively uniform) back to a normal distribution. I presume this is because of the larger familiarity of most users with normal residuals. Personally, I never considered this desirable, for the reasons explained in https://github.com/florianhartig/DHARMa/issues/39, but with this function, I wanted to give users the option to plot normal residuals if they so wish.
1903 #'
1904 #' @export
1905 #' @example inst/examples/simulateResidualsHelp.R
1906 #'
1907 residuals.DHARMa <- function(object, quantileFunction = NULL, outlierValues = NULL, ...){
1908
1909 if(is.null(quantileFunction)){
1910 return(object$scaledResiduals)
1911 } else {
1912 res = quantileFunction(object$scaledResiduals)
1913 if(!is.null(outlierValues)){
1914 res = ifelse(res == -Inf, outlierValues[1], res)
1915 res = ifelse(res == Inf, outlierValues[2], res)
1916 }
1917 return(res)
1918 }
1919 }
1920
1921
1922
1923 #' Return outliers
1924 #'
1925 #' Returns the outliers of a DHARMa object
1926 #'
1927 #' @param object an object with simulated residuals created by \code{\link{simulateResiduals}}
1928 #' @param lowerQuantile lower threshold for outliers. Default is zero = outside simulation envelope
1929 #' @param upperQuantile upper threshold for outliers. Default is 1 = outside simulation envelope
1930 #' @param return wheter to return an indices of outliers or a logical vector
1931 #'
1932 #' @details First of all, note that the standard definition of outlier in the DHARMa plots and outlier tests is an observation that is outside the simulation envelope. How far outside that is depends a lot on how many simulations you do. If you have 100 data points and to 100 simulations, you would expect to have one "outlier" on average, even with a perfectly fitting model. This is in fact what the outlier test tests.
1933 #'
1934 #' Thus, keep in mind that for a small number of simulations, outliers are mostly a technical term: these are points that are outside our simulations, but we don't know how far away they are.
1935 #'
1936 #' If you are seriously interested in HOW FAR outside the expected distribution a data point is, you should increase the number of simulations in \code{\link{simulateResiduals}} to be sure to get the tail of the data distribution correctly. In this case, it may make sense to adjust lowerQuantile and upperQuantile, e.g. to 0.025, 0.975, which would define outliers as values outside the central 95% of the distribution.
1937 #'
1938 #' Also, note that outliers are particularly concerning if they have a strong influence on the model fit. One could test the influence, for example, by removing them from the data, or by some meausures of leverage, e.g. generalisations for Cook's distance as in Pinho, L. G. B., Nobre, J. S., & Singer, J. M. (2015). Cook’s distance for generalized linear mixed models. Computational Statistics & Data Analysis, 82, 126–136. doi:10.1016/j.csda.2014.08.008. At the moment, however, no such function is provided in DHARMa.
1939 #'
1940 #' @export
1941 #'
1942 outliers <- function(object, lowerQuantile = 0, upperQuantile = 1, return = c("index", "logical")){
1943
1944 return = match.arg(return)
1945
1946 out = residuals(object) >= upperQuantile | residuals(object) <= lowerQuantile
1947
1948 if(return == "logical") return(out)
1949 else(return(which(out)))
1950 }
1951
1952
1953
1954 #' Create a DHARMa object from hand-coded simulations or Bayesian posterior predictive simulations
1955 #'
1956 #' @param simulatedResponse matrix of observations simulated from the fitted model - row index for observations and colum index for simulations
1957 #' @param observedResponse true observations
1958 #' @param fittedPredictedResponse optional fitted predicted response. For Bayesian posterior predictive simulations, using the median posterior prediction as fittedPredictedResponse is recommended. If not provided, the mean simulatedResponse will be used.
1959 #' @param integerResponse if T, noise will be added at to the residuals to maintain a uniform expectations for integer responses (such as Poisson or Binomial). Unlike in \code{\link{simulateResiduals}}, the nature of the data is not automatically detected, so this MUST be set by the user appropriately
1960 #' @param seed the random seed to be used within DHARMa. The default setting, recommended for most users, is keep the random seed on a fixed value 123. This means that you will always get the same randomization and thus teh same result when running the same code. NULL = no new seed is set, but previous random state will be restored after simulation. FALSE = no seed is set, and random state will not be restored. The latter two options are only recommended for simulation experiments. See vignette for details.
1961 #' @param method the quantile randomization method used. The two options implemented at the moment are probability integral transform (PIT-) residuals (current default), and the "traditional" randomization procedure, that was used in DHARMa until version 0.3.0. For details, see \code{\link{getQuantile}}
1962 #' @details The use of this function is to convert simulated residuals (e.g. from a point estimate, or Bayesian p-values) to a DHARMa object, to make use of the plotting / test functions in DHARMa
1963 #' @note Either scaled residuals or (simulatedResponse AND observed response) have to be provided
1964 #' @example inst/examples/createDharmaHelp.R
1965 #' @export
1966 createDHARMa <- function(simulatedResponse , observedResponse , fittedPredictedResponse = NULL, integerResponse = F, seed = 123, method = c("PIT", "traditional")){
1967
1968 randomState <-getRandomState(seed)
1969 on.exit({randomState$restoreCurrent()})
1970 match.arg(method)
1971
1972 out = list()
1973 out$simulatedResponse = simulatedResponse
1974 out$refit = F
1975 out$integerResponse = integerResponse
1976 out$observedResponse = observedResponse
1977
1978 if(!is.matrix(simulatedResponse) & !is.null(observedResponse)) stop("either scaled residuals or simulations and observations have to be provided")
1979 if(ncol(simulatedResponse) < 2) stop("simulatedResponse with less than 2 simulations provided - cannot calculate residuals on that.")
1980
1981 if(ncol(simulatedResponse) < 10) warning("simulatedResponse with less than 10 simulations provided. This rarely makes sense")
1982
1983 out$nObs = length(observedResponse)
1984
1985 if (out$nObs < 3) stop("warning - number of observations < 3 ... this rarely makes sense")
1986
1987 if(! (out$nObs == nrow(simulatedResponse))) stop("dimensions of observedResponse and simulatedResponse do not match")
1988
1989 out$nSim = ncol(simulatedResponse)
1990
1991 out$scaledResiduals = getQuantile(simulations = simulatedResponse , observed = observedResponse , integerResponse = integerResponse, method = method)
1992
1993
1994 # makes sure that DHARM plots that rely on this vector won't crash
1995 if(is.null(fittedPredictedResponse)){
1996 message("No fitted predicted response provided, using the mean of the simulations")
1997 fittedPredictedResponse = apply(simulatedResponse, 1, mean)
1998 }
1999 out$fittedPredictedResponse = fittedPredictedResponse
2000 out$randomState = randomState
2001 class(out) = "DHARMa"
2002 return(out)
2003 }
2004
2005
2006 #' Ensures that an object is of class DHARMa
2007 #'
2008 #' @param simulationOutput a DHARMa simulation output or an object that can be converted into a DHARMa simulation output
2009 #' @param convert if TRUE, attempts to convert model + numeric to DHARMa, if "Model", converts only supported models to DHARMa
2010 #' @details The
2011 #' @return an object of class DHARMa
2012 #' @keywords internal
2013 ensureDHARMa <- function(simulationOutput,
2014 convert = F){
2015
2016 if(inherits(simulationOutput, "DHARMa")){
2017 return(simulationOutput)
2018 } else {
2019
2020 if(convert == FALSE) stop("wrong argument to function, simulationOutput must be a DHARMa object!")
2021 else {
2022
2023 if (class(simulationOutput)[1] %in% getPossibleModels()){
2024 if (convert == "Model" | convert == T) return(simulateResiduals(simulationOutput))
2025 } else if(is.vector(simulationOutput, mode = "numeric") & convert == T) {
2026 out = list()
2027 out$scaledResiduals = simulationOutput
2028 out$nObs = length(out$scaledResiduals)
2029 class(out) = "DHARMa"
2030 return(out)
2031 }
2032 }
2033 }
2034 stop("wrong argument to function, simulationOutput must be a DHARMa object or a numeric vector of quantile residuals!")
2035 }
2036
2037 ####################### DHARMa.R
2038
2039 ####################### tests.R
2040
2041 #' DHARMa general residual test
2042 #'
2043 #' Calls both uniformity and dispersion test
2044 #'
2045 #' This function is a wrapper for the various test functions implemented in DHARMa. Currently, this function calls the \code{\link{testUniformity}} and the \code{\link{testDispersion}} functions. All other tests (see list below) have to be called by hand.
2046 #'
2047 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2048 #' @param plot if T, plots functions of the tests are called
2049 #' @author Florian Hartig
2050 #' @seealso \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}}
2051 #' @example inst/examples/testsHelp.R
2052 #' @export
2053 testResiduals <- function(simulationOutput, plot = T){
2054
2055 opar = par(mfrow = c(1,3))
2056 on.exit(par(opar))
2057 out = list()
2058 out$uniformity = testUniformity(simulationOutput, plot = plot)
2059 out$dispersion = testDispersion(simulationOutput, plot = plot)
2060 out$outliers = testOutliers(simulationOutput, plot = plot)
2061
2062 print(out)
2063 return(out)
2064 }
2065
2066 #' Residual tests
2067 #'
2068 #' @details Deprecated, switch your code to using the \code{\link{testResiduals}} function
2069 #'
2070 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2071 #' @author Florian Hartig
2072 #' @export
2073 testSimulatedResiduals <- function(simulationOutput){
2074 message("testSimulatedResiduals is deprecated, switch your code to using the testResiduals function")
2075 testResiduals(simulationOutput)
2076 }
2077
2078
2079 #' Test for overall uniformity
2080 #'
2081 #' This function tests the overall uniformity of the simulated residuals in a DHARMa object
2082 #'
2083 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2084 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis. See \code{\link[stats]{ks.test}} for details
2085 #' @param plot if T, plots calls \code{\link{plotQQunif}} as well
2086 #' @details The function applies a \code{\link[stats]{ks.test}} for uniformity on the simulated residuals.
2087 #' @author Florian Hartig
2088 #' @seealso \code{\link{testResiduals}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}}
2089 #' @example inst/examples/testsHelp.R
2090 #' @export
2091 testUniformity<- function(simulationOutput, alternative = c("two.sided", "less", "greater"), plot = T){
2092
2093 simulationOutput = ensureDHARMa(simulationOutput, convert = T)
2094
2095 out <- suppressWarnings(ks.test(simulationOutput$scaledResiduals, 'punif', alternative = alternative))
2096 if(plot == T) plotQQunif(simulationOutput = simulationOutput)
2097 return(out)
2098 }
2099
2100
2101 # Experimental
2102 testBivariateUniformity<- function(simulationOutput, alternative = c("two.sided", "less", "greater"), plot = T){
2103
2104 simulationOutput = ensureDHARMa(simulationOutput, convert = T)
2105
2106 #out <- suppressWarnings(ks.test(simulationOutput$scaledResiduals, 'punif', alternative = alternative))
2107 #if(plot == T) plotQQunif(simulationOutput = simulationOutput)
2108 out = NULL
2109 return(out)
2110 }
2111
2112
2113
2114 #' Test for quantiles
2115 #'
2116 #' This function tests
2117 #'
2118 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2119 #' @param predictor an optional predictor variable to be used, instead of the predicted response (default)
2120 #' @param quantiles the quantiles to be tested
2121 #' @param plot if T, the function will create an additional plot
2122 #' @details The function fits quantile regressions (via package qgam) on the residuals, and compares their location to the expected location (because of the uniform distributionm, the expected location is 0.5 for the 0.5 quantile).
2123 #'
2124 #' A significant p-value for the splines means the fitted spline deviates from a flat line at the expected location (p-values of intercept and spline are combined via Benjamini & Hochberg adjustment to control the FDR)
2125 #'
2126 #' The p-values of the splines are combined into a total p-value via Benjamini & Hochberg adjustment to control the FDR.
2127 #'
2128 #' @author Florian Hartig
2129 #' @example inst/examples/testQuantilesHelp.R
2130 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testOutliers}}
2131 #' @export
2132 testQuantiles <- function(simulationOutput, predictor = NULL, quantiles = c(0.25,0.5,0.75), plot = T){
2133
2134 if(plot == F){
2135
2136 out = list()
2137 out$data.name = deparse(substitute(simulationOutput))
2138
2139 simulationOutput = ensureDHARMa(simulationOutput, convert = T)
2140 res = simulationOutput$scaledResiduals
2141 pred = ensurePredictor(simulationOutput, predictor)
2142
2143 dat=data.frame(res = simulationOutput$scaledResiduals , pred = pred)
2144
2145 quantileFits <- list()
2146 pval = rep(NA, length(quantiles))
2147 predictions = data.frame(pred = sort(dat$pred))
2148 predictions = cbind(predictions, matrix(ncol = 2 * length(quantiles), nrow = nrow(dat)))
2149 for(i in 1:length(quantiles)){
2150 datTemp = dat
2151 datTemp$res = datTemp$res - quantiles[i]
2152
2153 # settings for k = the dimension of the basis used to represent the smooth term.
2154 # see https://github.com/mfasiolo/qgam/issues/37
2155 dimSmooth = min(length(unique(datTemp$pred)), 10)
2156 quantResult = try(capture.output(quantileFits[[i]] <- qgam::qgam(res ~ s(pred, k = dimSmooth) , data =datTemp, qu = quantiles[i])), silent = T)
2157 if(inherits(quantResult, "try-error")){
2158 message("Unable to calculate quantile regression for quantile ", quantiles[i], ". Possibly to few (unique) data points / predictions. Will be ommited in plots and significance calculations.")
2159 } else {
2160 x = summary(quantileFits[[i]])
2161 pval[i] = min(p.adjust(c(x$p.table[1,4], x$s.table[1,4]), method = "BH")) # correction for test on slope and intercept
2162 quantPre = predict(quantileFits[[i]], newdata = predictions, se = T)
2163 predictions[, 2*i] = quantPre$fit + quantiles[i]
2164 predictions[, 2*i + 1] = quantPre$se.fit
2165 }
2166 }
2167
2168 out$method = "Test for location of quantiles via qgam"
2169 out$alternative = "both"
2170 out$pvals = pval
2171 out$p.value = min(p.adjust(pval, method = "BH")) # correction for multiple quantile tests
2172 out$predictions = predictions
2173 out$qgamFits = quantileFits
2174
2175 class(out) = "htest"
2176
2177 } else if(plot == T) {
2178 out <- plotResiduals(simulationOutput = simulationOutput, predictor = predictor, quantiles = quantiles)
2179 }
2180 return(out)
2181 }
2182
2183
2184 #unif.2017YMi(X, type = c("Q1", "Q2", "Q3"), lower = rep(0, ncol(X)),upper = rep(1, ncol(X)))
2185
2186 #' Test for outliers
2187 #'
2188 #' This function tests if the number of observations outside the simulatio envelope are larger or smaller than expected
2189 #'
2190 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2191 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" (default) compared to the simulated null hypothesis
2192 #' @param margin whether to test for outliers only at the lower, only at the upper, or both sides (default) of the simulated data distribution
2193 #' @param plot if T, the function will create an additional plot
2194 #' @details DHARMa residuals are created by simulating from the fitted model, and comparing the simulated values to the observed data. It can occur that all simulated values are higher or smaller than the observed data, in which case they get the residual value of 0 and 1, respectively. I refer to these values as simulation outliers, or simply outliers.
2195 #'
2196 #' Because no data was simulated in the range of the observed value, we don't know "how strongly" these values deviate from the model expectation, so the term "outlier" should be used with a grain of salt - it's not a judgment about the magnitude of a deviation from an expectation, but simply that we are outside the simulated range, and thus cannot say anything more about the location of the residual.
2197 #'
2198 #' Note also that the number of outliers will decrease as we increase the number of simulations. Under the null hypothesis that the model is correct, we expect nData /(nSim +1) outliers at each margin of the distribution. For a reason, consider that if the data and the model distribution are identical, the probability that a given observation is higher than all simulations is 1/(nSim +1).
2199 #'
2200 #' Based on this null expectation, we can test for an excess or lack of outliers. Per default, testOutliers() looks for both, so if you get a significant p-value, you have to check if you have to many or too few outliers. An excess of outliers is to be interpreted as too many values outside the simulation envelope. This could be caused by overdispersion, or by what we classically call outliers. A lack of outliers would be caused, for example, by underdispersion.
2201 #'
2202 #'
2203 #' @author Florian Hartig
2204 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}}
2205 #' @example inst/examples/testOutliersHelp.R
2206 #' @export
2207 testOutliers <- function(simulationOutput, alternative = c("two.sided", "greater", "less"), margin = c("both", "upper", "lower"), plot = T){
2208
2209 # check inputs
2210 alternative = match.arg(alternative)
2211 margin = match.arg(margin)
2212 data.name = deparse(substitute(simulationOutput)) # remember: needs to be called before ensureDHARMa
2213 simulationOutput = ensureDHARMa(simulationOutput, convert = "Model")
2214
2215 # calculation of outliers
2216 if(margin == "both") outliers = sum(simulationOutput$scaledResiduals %in% c(0, 1))
2217 if(margin == "upper") outliers = sum(simulationOutput$scaledResiduals == 1)
2218 if(margin == "lower") outliers = sum(simulationOutput$scaledResiduals == 0)
2219
2220 # calculations of trials and H0
2221 outFreqH0 = 1/(simulationOutput$nSim +1) * ifelse(margin == "both", 2, 1)
2222 trials = simulationOutput$nObs
2223
2224 out = binom.test(outliers, trials, p = outFreqH0, alternative = alternative)
2225
2226 # overwrite information in binom.test
2227
2228 out$data.name = data.name
2229 out$margin = margin
2230 out$method = "DHARMa outlier test based on exact binomial test"
2231
2232 names(out$statistic) = paste("outliers at", margin, "margin(s)")
2233 names(out$parameter) = "simulations"
2234 names(out$estimate) = paste("frequency of outliers (expected:", out$null.value,")")
2235
2236 out$interval = "tst"
2237
2238 out$interval =
2239
2240 if(plot == T) {
2241
2242 hist(simulationOutput, main = "")
2243
2244 main = ifelse(out$p.value <= 0.05,
2245 "Outlier test significant",
2246 "Outlier test n.s.")
2247
2248 title(main = main, cex.main = 1,
2249 col.main = ifelse(out$p.value <= 0.05, "red", "black"))
2250
2251 # legend("center", c(paste("p=", round(out$p.value, digits = 5)), paste("Deviation ", ifelse(out$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(out$p.value < 0.05, "red", "black" ))
2252
2253 }
2254 return(out)
2255 }
2256
2257
2258 #' DHARMa dispersion tests
2259 #'
2260 #' This function performs a simulation-based test for over/underdispersion
2261 #'
2262 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2263 #' @param plot whether to plot output
2264 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis. Greater corresponds to overdispersion.
2265 #' @param ... arguments to pass on to \code{\link{testGeneric}}
2266 #' @details The function implements two tests, depending on whether it is applied on a simulation with refit = F, or refit = T.
2267 #'
2268 #' If refit = F, the function tests the sd of the data against the sd of the simulated data.
2269 #'
2270 #' If refit = T, the function compares the approximate deviance (via squared pearson residuals) with the same quantity from the models refitted with simulated data. Applying this is much slower than the previous alternative. Given the computational cost, I would suggest that most users will be satisfied with the standard dispersion test.
2271 #'
2272 #' @note The results of the dispersion test can can differ depending on whether it is evaluated on conditional (= conditional on fitted random effects) or unconditional (= REs are re-simulated) simulations. You can change between conditional or unconditional simulations in \code{\link{simulateResiduals}} if this is supported by the regression package that you use. The default in DHARMa is to use unconditional simulations, but I have often found that conditional simulations are more sensitive to dispersion problems. I recommend trying both, as neither test should be positive if the dispersion is correct.
2273 #'
2274 #' @author Florian Hartig
2275 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}}
2276 #' @example inst/examples/testsHelp.R
2277 #' @export
2278 testDispersion <- function(simulationOutput, alternative = c("two.sided", "greater", "less"), plot = T, ...){
2279
2280 out = list()
2281 out$data.name = deparse(substitute(simulationOutput))
2282
2283 alternative <- match.arg(alternative)
2284
2285 simulationOutput = ensureDHARMa(simulationOutput, convert = "Model")
2286
2287 if(simulationOutput$refit == F){
2288
2289 spread <- function(x) sd(x - simulationOutput$fittedPredictedResponse)
2290 out = testGeneric(simulationOutput, summary = spread, alternative = alternative, methodName = "DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated", plot = plot, ...)
2291 } else {
2292
2293 observed = tryCatch(sum(residuals(simulationOutput$fittedModel, type = "pearson")^2), error = function(e) {
2294 message(paste("DHARMa: the requested tests requires pearson residuals, but your model does not implement these calculations. Test will return NA. Error message:", e))
2295 return(NA)
2296 })
2297 if(is.na(observed)) return(NA)
2298 expected = apply(simulationOutput$refittedPearsonResiduals^2 , 2, sum)
2299 out$statistic = c(dispersion = observed / mean(expected))
2300 out$method = "DHARMa nonparametric dispersion test via mean deviance residual fitted vs. simulated-refitted"
2301
2302 p = getP(simulated = expected, observed = observed, alternative = alternative)
2303
2304 out$alternative = alternative
2305 out$p.value = p
2306 class(out) = "htest"
2307
2308 if(plot == T) {
2309 #plotTitle = gsub('(.{1,50})(\\s|$)', '\\1\n', out$method)
2310 xLabel = paste("Simulated values, red line = fitted model. p-value (",out$alternative, ") = ", out$p.value, sep ="")
2311
2312 hist(expected, xlim = range(expected, observed, na.rm=T ), col = "lightgrey", main = "", xlab = xLabel, breaks = 20, cex.main = 1)
2313 abline(v = observed, lwd= 2, col = "red")
2314
2315 main = ifelse(out$p.value <= 0.05,
2316 "Dispersion test significant",
2317 "Dispersion test n.s.")
2318
2319 title(main = main, cex.main = 1,
2320 col.main = ifelse(out$p.value <= 0.05, "red", "black"))
2321 }
2322 }
2323
2324 return(out)
2325 }
2326
2327 #' Simulated overdisperstion tests
2328 #'
2329 #' @details Deprecated, switch your code to using the \code{\link{testDispersion}} function
2330 #'
2331 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2332 #' @param ... additional arguments to \code{\link{testDispersion}}
2333 #' @export
2334 testOverdispersion <- function(simulationOutput, ...){
2335 message("testOverdispersion is deprecated, switch your code to using the testDispersion function")
2336 testDispersion(simulationOutput, ...)
2337 }
2338
2339 #' Parametric overdisperstion tests
2340 #'
2341 #' @details Deprecated, switch your code to using the \code{\link{testDispersion}} function. The function will do nothing, arguments will be ignored, the parametric tests is no longer recommend
2342 #'
2343 #' @param ... arguments will be ignored, the parametric tests is no longer recommend
2344 #' @export
2345 testOverdispersionParametric <- function(...){
2346 message("testOverdispersionParametric is deprecated and no longer recommended, see release notes in DHARMA 0.2.0 - switch your code to using the testDispersion function")
2347 return(0)
2348 }
2349
2350
2351 #' Tests for zero-inflation
2352 #'
2353 #' This function compares the observed number of zeros with the zeros expected from simulations.
2354 #'
2355 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2356 #' @param ... further arguments to \code{\link{testGeneric}}
2357 #' @details The plot shows the expected distribution of zeros against the observed values, the ratioObsSim shows observed vs. simulated zeros. A value < 1 means that the observed data has less zeros than expected, a value > 1 means that it has more zeros than expected (aka zero-inflation). Per default, the function tests both sides.
2358 #'
2359 #' Some notes about common problems / questions:
2360 #'
2361 #' * Zero-inflation tests after fitting the model are crucial to see if you have zero-inflation. Just because there are a lot of zeros doesn't mean you have zero-inflation, see Warton, D. I. (2005). Many zeros does not mean zero inflation: comparing the goodness-of-fit of parametric models to multivariate abundance data. Environmetrics 16(3), 275-289.
2362 #'
2363 #' * That being said, zero-inflation tests are often not a reliable guide to decide wheter to add a zi term or not. In general, model structures should be decided on ideally a priori, if that is not possible via model selection techniques (AIC, BIC, WAIC, Bayes Factor). A zero-inflation test should only be run after that decision, and to validate the decision that was taken.
2364 #'
2365 #' @note This function is a wrapper for \code{\link{testGeneric}}, where the summary argument is set to function(x) sum(x == 0)
2366 #' @author Florian Hartig
2367 #' @example inst/examples/testsHelp.R
2368 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}}
2369 #' @export
2370 testZeroInflation <- function(simulationOutput, ...){
2371 countZeros <- function(x) sum( x == 0)
2372 testGeneric(simulationOutput = simulationOutput, summary = countZeros, methodName = "DHARMa zero-inflation test via comparison to expected zeros with simulation under H0 = fitted model", ... )
2373 }
2374
2375
2376 #' Generic simulation test of a summary statistic
2377 #'
2378 #' This function tests if a user-defined summary differs when applied to simulated / observed data.
2379 #'
2380 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2381 #' @param summary a function that can be applied to simulated / observed data. See examples below
2382 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis
2383 #' @param plot whether to plot the simulated summary
2384 #' @param methodName name of the test (will be used in plot)
2385 #'
2386 #' @details This function tests if a user-defined summary differs when applied to simulated / observed data. the function can easily be remodeled to apply summaries on the residuals, by simply defining f = function(x) summary (x - predictions), as done in \code{\link{testDispersion}}
2387 #'
2388 #' @note The function that you supply is applied on the data as it is represented in your fitted model, which may not always correspond to how you think. This is important in particular when you use k/n binomial data, and want to test for 1-inflation. As an example, if have k/20 observations, and you provide your data via cbind (y, y-20), you have to test for 20-inflation (because this is how the data is represented in the model). However, if you provide data via y/20, and weights = 20, you should test for 1-inflation. In doubt, check how the data is internally represented in model.frame(model), or via simulate(model)
2389 #'
2390 #' @export
2391 #' @author Florian Hartig
2392 #' @example inst/examples/testsHelp.R
2393 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}}
2394 testGeneric <- function(simulationOutput, summary, alternative = c("two.sided", "greater", "less"), plot = T, methodName = "DHARMa generic simulation test"){
2395
2396 out = list()
2397 out$data.name = deparse(substitute(simulationOutput))
2398
2399 simulationOutput = ensureDHARMa(simulationOutput, convert = "Model")
2400
2401 alternative <- match.arg(alternative)
2402
2403 observed = summary(simulationOutput$observedResponse)
2404
2405 simulated = apply(simulationOutput$simulatedResponse, 2, summary)
2406
2407 p = getP(simulated = simulated, observed = observed, alternative = alternative)
2408
2409 out$statistic = c(ratioObsSim = observed / mean(simulated))
2410 out$method = methodName
2411 out$alternative = alternative
2412 out$p.value = p
2413
2414
2415 class(out) = "htest"
2416
2417 if(plot == T) {
2418 plotTitle = gsub('(.{1,50})(\\s|$)', '\\1\n', methodName)
2419 xLabel = paste("Simulated values, red line = fitted model. p-value (",out$alternative, ") = ", out$p.value, sep ="")
2420 hist(simulated, xlim = range(simulated, observed, na.rm=T ), col = "lightgrey", main = plotTitle, xlab = xLabel, breaks = max(round(simulationOutput$nSim / 5), 20), cex.main = 0.8)
2421 abline(v = observed, lwd= 2, col = "red")
2422 }
2423 return(out)
2424 }
2425
2426
2427 #' Test for temporal autocorrelation
2428 #'
2429 #' This function performs a standard test for temporal autocorrelation on the simulated residuals
2430 #'
2431 #' @param simulationOutput an object with simulated residuals created by \code{\link{simulateResiduals}}
2432 #' @param time the time, in the same order as the data points. If not provided, random values will be created
2433 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis
2434 #' @param plot whether to plot output
2435 #' @details The function performs a Durbin-Watson test on the uniformly scaled residuals, and plots the residuals against time. The DB test was originally be designed for normal residuals. In simulations, I didn't see a problem with this setting though. The alternative is to transform the uniform residuals to normal residuals and perform the DB test on those.
2436 #'
2437 #' If no time values are provided, random values will be created. The sense of being able to run the test with time = NULL (random values) is to test the rate of false positives under the current residual structure (random time corresponds to H0: no spatial autocorrelation), e.g. to check if the test has noninal error rates for particular residual structures (note that Durbin-Watson originally assumes normal residuals, error rates seem correct for uniform residuals, but may not be correct if there are still other residual problems).
2438 #'
2439 #' Testing for temporal autocorrelation requires unique time values - if you have several observations per time value, either use the recalculateResiduals function to aggregate residuals per time step, or extract the residuals from the fitted object, and plot / test each of them independently for temporally repeated subgroups (typical choices would be location / subject etc.). Note that the latter must be done by hand, outside testSpatialAutocorrelation.
2440 #'
2441 #' @note Important to note for all autocorrelation tests (spatial / temporal): the autocorrelation tests are valid to check for residual autocorrelation in models that don't assume such a correlation (in this case, you can use conditional or unconditional simulations), or if there is remaining residual autocorrelation after accounting for it in a spatial/temporal model (in that case, you have to use conditional simulations), but if checking unconditional simulations from a model with an autocorrelation structure on data that corresponds to this model, they will be significant, even if the model fully accounts for this structure.
2442 #'
2443 #' This behavior is not really a bug, but rather originates from the definition of the quantile residuals: quantile residuals are calculated independently per data point, i.e. without consideratin of any correlation structure between data points that may exist in the simulations. As a result, the simulated distributions from a unconditional simulaton will typically not reflect the correlation structure that is present in each single simulation, and the same is true for the subsequently calculated quantile residuals.
2444 #'
2445 #' The bottomline here is that spatial / temporal / other autoregressive models should either be tested based on conditional simulations, or (ideally) custom tests should be used that are not based on quantile residuals, but rather compare the correlation structure in the simulated data with the correlation structure in the observed data.
2446 #'
2447 #' @author Florian Hartig
2448 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}}
2449 #' @example inst/examples/testTemporalAutocorrelationHelp.R
2450 #' @export
2451 testTemporalAutocorrelation <- function(simulationOutput, time = NULL , alternative = c("two.sided", "greater", "less"), plot = T){
2452
2453 simulationOutput = ensureDHARMa(simulationOutput, convert = T)
2454
2455 # actually not sure if this is neccessary for dwtest, but seems better to aggregate
2456 if(any(duplicated(time))) stop("testing for temporal autocorrelation requires unique time values - if you have several observations per time value, either use the recalculateResiduals function to aggregate residuals per time step, or extract the residuals from the fitted object, and plot / test each of them independently for temporally repeated subgroups (typical choices would be location / subject etc.). Note that the latter must be done by hand, outside testSpatialAutocorrelation.")
2457
2458 alternative <- match.arg(alternative)
2459
2460 if(is.null(time)){
2461 time = sample.int(simulationOutput$nObs, simulationOutput$nObs)
2462 message("DHARMa::testTemporalAutocorrelation - no time argument provided, using random times for each data point")
2463 }
2464
2465 out = lmtest::dwtest(simulationOutput$scaledResiduals ~ 1, order.by = time, alternative = alternative)
2466
2467 if(plot == T) {
2468 oldpar <- par(mfrow = c(1,2))
2469 on.exit(par(oldpar))
2470
2471 plot(simulationOutput$scaledResiduals[order(time)] ~ time[order(time)],
2472 type = "l", ylab = "Scaled residuals", xlab = "Time", main = "Residuals vs. time")
2473 acf(simulationOutput$scaledResiduals[order(time)], main = "Autocorrelation")
2474 legend("topright",
2475 c(paste(out$method, " p=", round(out$p.value, digits = 5)),
2476 paste("Deviation ", ifelse(out$p.value < 0.05, "significant", "n.s."))),
2477 text.col = ifelse(out$p.value < 0.05, "red", "black" ), bty="n")
2478
2479 }
2480
2481 return(out)
2482 }
2483
2484
2485 #' Test for spatial autocorrelation
2486 #'
2487 #' This function performs a standard test for spatial autocorrelation on the simulated residuals
2488 #'
2489 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2490 #' @param x the x coordinate, in the same order as the data points. If not provided, random values will be created
2491 #' @param y the y coordinate, in the same order as the data points. If not provided, random values will be created
2492 #' @param distMat optional distance matrix. If not provided, a distance matrix will be calculated based on x and y. See details for explanation
2493 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis
2494 #' @param plot whether to plot output
2495 #' @details The function performs Moran.I test from the package ape, based on the provided distance matrix of the data points.
2496 #'
2497 #' There are several ways to specify this distance. If a distance matrix (distMat) is provided, calculations will be based on this distance matrix, and x,y coordinates will only used for the plotting (if provided)
2498 #' If distMat is not provided, the function will calculate the euclidian distances between x,y coordinates, and test Moran.I based on these distances.
2499 #'
2500 #' If no x/y values are provided, random values will be created. The sense of being able to run the test with x/y = NULL (random values) is to test the rate of false positives under the current residual structure (random x/y corresponds to H0: no spatial autocorrelation), e.g. to check if the test has nominal error rates for particular residual structures.
2501 #'
2502 #' Testing for spatial autocorrelation requires unique x,y values - if you have several observations per location, either use the recalculateResiduals function to aggregate residuals per location, or extract the residuals from the fitted object, and plot / test each of them independently for spatially repeated subgroups (a typical scenario would repeated spatial observation, in which case one could plot / test each time step separately for temporal autocorrelation). Note that the latter must be done by hand, outside testSpatialAutocorrelation.
2503 #'
2504 #' @note Important to note for all autocorrelation tests (spatial / temporal): the autocorrelation tests are valid to check for residual autocorrelation in models that don't assume such a correlation (in this case, you can use conditional or unconditional simulations), or if there is remaining residual autocorrelation after accounting for it in a spatial/temporal model (in that case, you have to use conditional simulations), but if checking unconditional simulations from a model with an autocorrelation structure on data that corresponds to this model, they will be significant, even if the model fully accounts for this structure.
2505 #'
2506 #' This behavior is not really a bug, but rather originates from the definition of the quantile residuals: quantile residuals are calculated independently per data point, i.e. without consideratin of any correlation structure between data points that may exist in the simulations. As a result, the simulated distributions from a unconditional simulaton will typically not reflect the correlation structure that is present in each single simulation, and the same is true for the subsequently calculated quantile residuals.
2507 #'
2508 #' The bottomline here is that spatial / temporal / other autoregressive models should either be tested based on conditional simulations, or (ideally) custom tests should be used that are not based on quantile residuals, but rather compare the correlation structure in the simulated data with the correlation structure in the observed data.
2509 #'
2510 #' @author Florian Hartig
2511 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testQuantiles}}
2512 #' @import grDevices
2513 #' @example inst/examples/testSpatialAutocorrelationHelp.R
2514 #' @export
2515 testSpatialAutocorrelation <- function(simulationOutput, x = NULL, y = NULL, distMat = NULL, alternative = c("two.sided", "greater", "less"), plot = T){
2516
2517 alternative <- match.arg(alternative)
2518 data.name = deparse(substitute(simulationOutput)) # needs to be before ensureDHARMa
2519 simulationOutput = ensureDHARMa(simulationOutput, convert = T)
2520
2521 if(any(duplicated(cbind(x,y)))) stop("testing for spatial autocorrelation requires unique x,y values - if you have several observations per location, either use the recalculateResiduals function to aggregate residuals per location, or extract the residuals from the fitted object, and plot / test each of them independently for spatially repeated subgroups (a typical scenario would repeated spatial observation, in which case one could plot / test each time step separately for temporal autocorrelation). Note that the latter must be done by hand, outside testSpatialAutocorrelation.")
2522
2523 if( (!is.null(x) | !is.null(y)) & !is.null(distMat) ) message("both coordinates and distMat provided, calculations will be done based on the distance matrix, coordinates will only be used for plotting")
2524 # if not provided, fill x and y with random numbers (Null model)
2525 if(is.null(x)){
2526 x = runif(simulationOutput$nObs, -1,1)
2527 message("DHARMa::testSpatialAutocorrelation - no x coordinates provided, using random values for each data point")
2528 }
2529
2530 if(is.null(y)){
2531 y = runif(simulationOutput$nObs, -1,1)
2532 message("DHARMa::testSpatialAutocorrelation - no x coordinates provided, using random values for each data point")
2533 }
2534
2535 # if not provided, create distance matrix based on x and y
2536 if(is.null(distMat)) distMat <- as.matrix(dist(cbind(x, y)))
2537
2538 invDistMat <- 1/distMat
2539 diag(invDistMat) <- 0
2540
2541 MI = ape::Moran.I(simulationOutput$scaledResiduals, weight = invDistMat, alternative = alternative)
2542
2543 out = list()
2544 out$statistic = c(observed = MI$observed, expected = MI$expected, sd = MI$sd)
2545 out$method = "DHARMa Moran's I test for spatial autocorrelation"
2546 out$alternative = "Spatial autocorrelation"
2547 out$p.value = MI$p.value
2548 out$data.name = data.name
2549
2550 class(out) = "htest"
2551
2552
2553
2554 if(plot == T) {
2555 opar <- par(mfrow = c(1,1))
2556 on.exit(par(opar))
2557
2558 col = colorRamp(c("red", "white", "blue"))(simulationOutput$scaledResiduals)
2559 plot(x,y, col = rgb(col, maxColorValue = 255), main = out$method, cex.main = 0.8 )
2560
2561 # TODO implement correlogram
2562 }
2563
2564 if(plot == T) {
2565
2566
2567 }
2568 return(out)
2569 }
2570
2571
2572 getP <- function(simulated, observed, alternative){
2573
2574 if(alternative == "greater") p = mean(simulated >= observed)
2575 if(alternative == "less") p = mean(simulated <= observed)
2576 if(alternative == "two.sided") p = min(min(mean(simulated <= observed), mean(simulated >= observed) ) * 2,1)
2577
2578 return(p)
2579 }
2580
2581
2582
2583 ####################### tests.R
2584
2585 ####################### compatibility.R
2586
2587
2588 # New S3 methods
2589
2590 #' Get model response
2591 #'
2592 #' Extract the response of a fitted model
2593 #'
2594 #' The purpose of this function is to savely extract the response (dependent variable) of the fitted model classes
2595 #'
2596 #' @param object a fitted model
2597 #' @param ... additional parameters
2598 #'
2599 #' @example inst/examples/wrappersHelp.R
2600 #'
2601 #' @seealso \code{\link{getRefit}}, \code{\link{getSimulations}}, \code{\link{getFixedEffects}}, \code{\link{getFitted}}
2602 #' @author Florian Hartig
2603 #' @export
2604 getObservedResponse <- function (object, ...) {
2605 UseMethod("getObservedResponse", object)
2606 }
2607
2608 #' @rdname getObservedResponse
2609 #' @export
2610 getObservedResponse.default <- function (object, ...){
2611 out = model.frame(object)[,1]
2612
2613 # check for weights in k/n case
2614 if(family(object)$family %in% c("binomial", "betabinomial") & "(weights)" %in% colnames(model.frame(object))){
2615 x = model.frame(object)
2616 out = out * x$`(weights)`
2617 }
2618
2619 # check for k/n binomial
2620 if(is.matrix(out)){
2621 if(!(ncol(out) == 2)) securityAssertion("nKcase - wrong dimensions of response")
2622 if(!(family(object)$family %in% c("binomial", "betabinomial"))) securityAssertion("nKcase - wrong family")
2623
2624 out = out[,1]
2625 }
2626
2627 # observation is factor - unlike lme4 and older, glmmTMB simulates nevertheless as numeric
2628 if(is.factor(out)) out = as.numeric(out) - 1
2629
2630 return(out)
2631 }
2632
2633 weightsWarning = "Model was fit with prior weights. These will be ignored in the simulation. See ?getSimulations for details"
2634
2635 #' Get model simulations
2636 #'
2637 #' Wrapper to simulate from a fitted model
2638 #'
2639 #' The purpose of this wrapper for for the simulate function is to return the simulations from a model in a standardized way
2640 #'
2641 #' @param object a fitted model
2642 #' @param nsim number of simulations
2643 #' @param type if simulations should be prepared for getQuantile or for refit
2644 #' @param ... additional parameters to be passed on, usually to the simulate function of the respective model class
2645 #'
2646 #' @return a matrix with simulations
2647 #' @example inst/examples/wrappersHelp.R
2648 #'
2649 #' @seealso \code{\link{getObservedResponse}}, \code{\link{getRefit}}, \code{\link{getFixedEffects}}, \code{\link{getFitted}}
2650 #'
2651 #' @details The function is a wrapper for for the simulate function is to return the simulations from a model in a standardized way.
2652 #'
2653 #' Note: if the model was fit with weights, the function will throw a warning if used with a model class whose simulate function does not include the weightsi in the simulations. Note that the results may or may not be appropriate in this case, depending on how you use the weights.
2654 #'
2655 #'
2656 #' @author Florian Hartig
2657 #' @export
2658 getSimulations <- function (object, nsim = 1 , type = c("normal", "refit"), ...) {
2659 UseMethod("getSimulations", object)
2660 }
2661
2662 #' @rdname getSimulations
2663 #' @export
2664 getSimulations.default <- function (object, nsim = 1, type = c("normal", "refit"), ...){
2665
2666 type <- match.arg(type)
2667
2668 out = simulate(object, nsim = nsim , ...)
2669
2670 if (type == "normal"){
2671 if(family(object)$family %in% c("binomial", "betabinomial")){
2672 if("(weights)" %in% colnames(model.frame(object))){
2673 x = model.frame(object)
2674 out = out * x$`(weights)`
2675 } else if (is.matrix(out[[1]])){
2676 # this is for the k/n binomial case
2677 out = as.matrix(out)[,seq(1, (2*nsim), by = 2)]
2678 } else if(is.factor(out[[1]])){
2679 if(nlevels(out[[1]]) != 2){
2680 warning("The fitted model has a factorial response with number of levels not equal to 2 - there is currently no sensible application in DHARMa that would lead to this situation. Likely, you are trying something that doesn't work.")
2681 }
2682 else{
2683 out = data.matrix(out) - 1
2684 }
2685 }
2686 }
2687
2688 if(!is.matrix(out)) out = data.matrix(out)
2689 } else{
2690 if(family(object)$family %in% c("binomial", "betabinomial")){
2691 if (!is.matrix(out[[1]]) & !is.numeric(out)) data.frame(data.matrix(out)-1)
2692 }
2693 }
2694
2695 return(out)
2696 }
2697
2698
2699 #' Extract fixed effects of a supported model
2700 #'
2701 #' A wrapper to extract fixed effects of a supported model
2702 #'
2703 #' @param fittedModel a fitted model
2704 #'
2705 #' @example inst/examples/wrappersHelp.R
2706 #'
2707 #' @importFrom lme4 fixef
2708 #'
2709 #' @seealso \code{\link{getObservedResponse}}, \code{\link{getSimulations}}, \code{\link{getRefit}}, \code{\link{getFitted}}
2710 #' @export
2711 getFixedEffects <- function(fittedModel){
2712
2713 if(class(fittedModel)[1] %in% c("glm", "lm", "gam", "bam", "negbin") ){
2714 out = coef(fittedModel)
2715 } else if(class(fittedModel)[1] %in% c("glmerMod", "lmerMod", "HLfit")){
2716 out = fixef(fittedModel)
2717 } else if(class(fittedModel)[1] %in% c("glmmTMB")){
2718 out = glmmTMB::fixef(fittedModel)
2719 out = out$cond
2720 } else {
2721 out = coef(fittedModel)
2722 if(is.null(out)) out = fixef(fittedModel)
2723 }
2724 return(out)
2725 }
2726
2727 #' Get model refit
2728 #'
2729 #' Wrapper to refit a fitted model
2730 #'
2731 #' @param object a fitted model
2732 #' @param newresp the new response that should be used to refit the model
2733 #' @param ... additional parameters to be passed on to the refit or update class that is used to refit the model
2734 #'
2735 #' @details The purpose of this wrapper is to standardize the refit of a model. The behavior of this function depends on the supplied model. When available, it uses the refit method, otherwise it will use update. For glmmTMB: since version 1.0, glmmTMB has a refit function, but this didn't work, so I switched back to this implementation, which is a hack based on the update function.
2736 #'
2737 #' @example inst/examples/wrappersHelp.R
2738 #'
2739 #' @seealso \code{\link{getObservedResponse}}, \code{\link{getSimulations}}, \code{\link{getFixedEffects}}
2740 #' @author Florian Hartig
2741 #' @export
2742 getRefit <- function (object, newresp, ...) {
2743 UseMethod("getRefit", object)
2744 }
2745
2746 #' @rdname getRefit
2747 #'
2748 #' @importFrom lme4 refit
2749 #'
2750 #' @export
2751 getRefit.default <- function (object, newresp, ...){
2752 refit(object, newresp, ...)
2753 }
2754
2755 #' Get model fitted
2756 #'
2757 #' Wrapper to get the fitted value a fitted model
2758 #'
2759 #' The purpose of this wrapper is to standardize extract the fitted values
2760 #'
2761 #' @param object a fitted model
2762 #' @param ... additional parameters to be passed on, usually to the simulate function of the respective model class
2763 #'
2764 #' @example inst/examples/wrappersHelp.R
2765 #'
2766 #' @seealso \code{\link{getObservedResponse}}, \code{\link{getSimulations}}, \code{\link{getRefit}}, \code{\link{getFixedEffects}}
2767 #'
2768 #' @author Florian Hartig
2769 #' @export
2770 getFitted <- function (object, ...) {
2771 UseMethod("getFitted", object)
2772 }
2773
2774 #' @rdname getFitted
2775 #' @export
2776 getFitted.default <- function (object,...){
2777 fitted(object, ...)
2778 }
2779
2780 #' has NA
2781 #'
2782 #' checks if the fitted model excluded NA values
2783 #'
2784 #' @param object a fitted model
2785 #'
2786 #' @details Checks if the fitted model excluded NA values
2787 #'
2788 #' @export
2789
2790
2791 # hasNA <- function(object){
2792 # x = rownames(model.frame(object))
2793 # if(length(x) < as.numeric(x[length(x) ])) return(TRUE)
2794 # else return(FALSE)
2795 # }
2796
2797 ######### LM #############
2798
2799 #' @rdname getRefit
2800 #' @export
2801 getRefit.lm <- function(object, newresp, ...){
2802
2803 newData <-model.frame(object)
2804
2805 if(is.vector(newresp)){
2806 newData[,1] = newresp
2807 } else if (is.factor(newresp)){
2808 # Hack to make the factor binomial case work
2809 newData[,1] = as.numeric(newresp) - 1
2810 } else {
2811 # Hack to make the binomial n/k case work
2812 newData[[1]] = NULL
2813 newData = cbind(newresp, newData)
2814 }
2815
2816 refittedModel = update(object, data = newData)
2817 return(refittedModel)
2818 }
2819
2820
2821 hasWeigths.lm <- function(object, ...){
2822 if(length(unique(object$prior.weights)) != 1) return(TRUE)
2823 else return(FALSE)
2824 }
2825
2826
2827 ######### GLM #############
2828
2829 #' @rdname getSimulations
2830 #' @export
2831 getSimulations.negbin<- function (object, nsim = 1, type = c("normal", "refit"), ...){
2832 if("(weights)" %in% colnames(model.frame(object))) warning(weightsWarning)
2833 getSimulations.default(object = object, nsim = nsim, type = type, ...)
2834 }
2835
2836
2837 ######## MGCV ############
2838
2839 # This function overwrites the standard fitted function for GAM
2840
2841 #' @rdname getFitted
2842 #' @export
2843 getFitted.gam <- function(object, ...){
2844 class(object) = "glm"
2845 out = stats::fitted(object, ...)
2846 names(out) = as.character(1:length(out))
2847 out
2848 }
2849
2850 # Check that this works
2851 # plot(fitted(fittedModelGAM), predict(fittedModelGAM, type = "response"))
2852
2853
2854 ######## lme4 ############
2855
2856
2857 #' @rdname getSimulations
2858 #' @export
2859 getSimulations.lmerMod <- function (object, nsim = 1, type = c("normal", "refit"), ...){
2860
2861 if("(weights)" %in% colnames(model.frame(object))) warning(weightsWarning)
2862 getSimulations.default(object = object, nsim = nsim, type = type, ...)
2863 }
2864
2865
2866 ######## glmmTMB ######
2867
2868 #' @rdname getRefit
2869 #' @export
2870 getRefit.glmmTMB <- function(object, newresp, ...){
2871 newData <-model.frame(object)
2872
2873 # hack to make update work - for some reason, glmmTMB wants the matrix embedded in the df for update to work ... should be solved ideally, see https://github.com/glmmTMB/glmmTMB/issues/549
2874 if(is.matrix(newresp)){
2875 tmp = colnames(newData[[1]])
2876 newData[[1]] = NULL
2877 newData = cbind(newresp, newData)
2878 colnames(newData)[1:2] = tmp
2879 } else {
2880 newData[[1]] = newresp
2881 }
2882 refittedModel = update(object, data = newData)
2883 return(refittedModel)
2884 }
2885
2886
2887 # glmmTMB simulates normal counts (and not proportions in any case, so the check for the other models is not needed), see #158
2888 # note that if observation is factor - unlike lme4 and older, glmmTMB simulates nevertheless as numeric
2889
2890 #' @rdname getSimulations
2891 #' @export
2892 getSimulations.glmmTMB <- function (object, nsim = 1, type = c("normal", "refit"), ...){
2893
2894 if("(weights)" %in% colnames(model.frame(object)) & ! family(object)$family %in% c("binomial", "betabinomial")) warning(weightsWarning)
2895
2896 type <- match.arg(type)
2897
2898 out = simulate(object, nsim = nsim, ...)
2899
2900 if (type == "normal"){
2901 if (is.matrix(out[[1]])){
2902 # this is for the k/n binomial case
2903 out = as.matrix(out)[,seq(1, (2*nsim), by = 2)]
2904 }
2905 if(!is.matrix(out)) out = data.matrix(out)
2906 }else{
2907
2908 # check for weights in k/n case
2909 if(family(object)$family %in% c("binomial", "betabinomial")){
2910 if("(weights)" %in% colnames(model.frame(object))){
2911 w = model.frame(object)
2912 w = w$`(weights)`
2913 tmp <- function(x)x/w
2914 out = apply(out, 2, tmp)
2915 out = as.data.frame(out)
2916 }
2917 else if(is.matrix(out[[1]]) & !is.matrix(model.frame(object)[[1]])){
2918 out = as.data.frame(as.matrix(out)[,seq(1, (2*nsim), by = 2)])
2919 }
2920 }
2921
2922
2923
2924
2925
2926
2927 # matrixResp =
2928 #
2929 # if(matrixResp & !is.null(ncol(newresp))){
2930 # # Hack to make the factor binomial case work
2931 # tmp = colnames(newData[[1]])
2932 # newData[[1]] = NULL
2933 # newData = cbind(newresp, newData)
2934 # colnames(newData)[1:2] = tmp
2935 # } else if(!is.null(ncol(newresp))){
2936 # newData[[1]] = newresp[,1]
2937 # } else {
2938 # newData[[1]] = newresp
2939 # }
2940
2941
2942 # if (out$modelClass == "glmmTMB" & ncol(simulations) == 2*n) simObserved = simulations[,(1+(2*(i-1))):(2+(2*(i-1)))]
2943 }
2944
2945 # else securityAssertion("Simulation results produced unsupported data structure", stop = TRUE)
2946
2947 return(out)
2948 }
2949
2950 ####### spaMM #########
2951
2952 #' @rdname getObservedResponse
2953 #' @export
2954 getObservedResponse.HLfit <- function(object, ...){
2955 out = spaMM::response(object, ...)
2956
2957 nKcase = is.matrix(out)
2958 if(nKcase){
2959 if(! (family(object) %in% c("binomial", "betabinomial"))) securityAssertion("nKcase - wrong family")
2960 if(! (ncol(out)==2)) securityAssertion("nKcase - wrong dimensions of response")
2961 out = out[,1]
2962 }
2963
2964 if(!is.numeric(out)) out = as.numeric(out)
2965
2966 return(out)
2967
2968 }
2969
2970 #' @rdname getSimulations
2971 #' @export
2972 getSimulations.HLfit <- function(object, nsim = 1, type = c("normal", "refit"), ...){
2973
2974 type <- match.arg(type)
2975
2976 capture.output({out = simulate(object, nsim = nsim, ...)})
2977
2978 if(type == "normal"){
2979 if(!is.matrix(out)) out = data.matrix(out)
2980 }else{
2981 out = as.data.frame(out)
2982 }
2983 return(out)
2984 }
2985
2986 #' @rdname getRefit
2987 #' @export
2988 getRefit.HLfit <- function(object, newresp, ...) {
2989 spaMM::update_resp(object, newresp, evaluate = TRUE)
2990 }
2991
2992 ####################### compatibility.R
2993
2994 ####################### helper.R
2995
2996 #' Modified ECDF function
2997 #'
2998 #' @details ensures symmetric ECDF (standard ECDF is <), and that 0 / 1 values are only produced if the data is strictly < > than the observed data
2999 #'
3000 #' @keywords internal
3001 DHARMa.ecdf <- function (x)
3002 {
3003 x <- sort(x)
3004 n <- length(x)
3005 if (n < 1)
3006 stop(paste("DHARMa.ecdf - length vector < 1", x))
3007 vals <- unique(x)
3008 rval <- approxfun(vals, cumsum(tabulate(match(x, vals)))/ (n +1),
3009 method = "linear", yleft = 0, yright = 1, ties = "ordered")
3010 class(rval) <- c("ecdf", "stepfun", class(rval))
3011 assign("nobs", n, envir = environment(rval))
3012 attr(rval, "call") <- sys.call()
3013 rval
3014 }
3015
3016
3017
3018 #' calculate quantiles
3019 #'
3020 #' calculates residual quantiles from a given simulation
3021 #'
3022 #' @param simulations a matrix with simulations from a fitted model. Rows = observations, columns = replicate simulations
3023 #' @param observed a vector with the observed data
3024 #' @param integerResponse is the response integer-valued. Only has an effect for method = "traditional"
3025 #' @param method the quantile randomization method used. See details
3026 #'
3027 #' @details The function calculates residual quantiles from the simulated data. For continous distributions, this will simply the the value of the ecdf.
3028 #'
3029 #' For discrete data, there are two options implemented.
3030 #'
3031 #' The current default (available since DHARMa 0.3.1) are probability integral transform (PIT-) residuals (Smith, 1985; Dunn & Smyth, 1996; see also see also Warton, et al., 2017).
3032 #'
3033 #' Before DHARMa 0.3.1, a different randomization procedure was used, in which the a U(-0.5, 0.5) distribution was added on observations and simualtions for discrete distributions. For a completely discrete distribution, the two procedures should deliver equivalent results, but the second method has the disadvantage that a) one has to know if the distribution is discrete (DHARMa tries to recognize this automatically), and b) that it leads to inefficiencies for some distributions such as the the Tweedie, which are partly continous, partly discrte (see e.g. https://github.com/florianhartig/DHARMa/issues/168).
3034 #'
3035 #' @references
3036 #'
3037 #' Smith, J. Q. "Diagnostic checks of non-standard time series models." Journal of Forecasting 4.3 (1985): 283-291.
3038 #'
3039 #' Dunn, P.K., & Smyth, G.K. (1996). Randomized quantile residuals. Journal of Computational and Graphical Statistics 5, 236-244.
3040 #'
3041 #' Warton, David I., Loïc Thibaut, and Yi Alice Wang. "The PIT-trap—A “model-free” bootstrap procedure for inference about regression models with discrete, multivariate responses." PloS one 12.7 (2017)
3042 #'
3043 #' @export
3044 getQuantile <- function(simulations, observed, integerResponse, method = c("PIT", "traditional")){
3045
3046 method = match.arg(method)
3047
3048 n = length(observed)
3049 if (nrow(simulations) != n) stop("DHARMa::getquantile: wrong dimension of simulations")
3050 nSim = ncol(simulations)
3051
3052
3053 if(method == "traditional"){
3054
3055 if(integerResponse == F){
3056
3057 if(any(duplicated(observed))) message("Model family was recognized or set as continuous, but duplicate values were detected in the response. Consider if you are fitting an appropriate model.")
3058
3059 values = as.vector(simulations)[duplicated(as.vector(simulations))]
3060 if(length(values) > 0){
3061 if (all(values%%1==0)){
3062 integerResponse = T
3063 message("Model family was recognized or set as continuous, but duplicate values were detected in the simulation - changing to integer residuals (see ?simulateResiduals for details)")
3064 } else {
3065 message("Duplicate non-integer values found in the simulation. If this is because you are fitting a non-inter valued discrete response model, note that DHARMa does not perform appropriate randomization for such cases.")
3066 }
3067
3068 }
3069 }
3070
3071 scaledResiduals = rep(NA, n)
3072 for (i in 1:n){
3073 if(integerResponse == T){
3074 scaledResiduals[i] <- DHARMa.ecdf(simulations[i,] + runif(nSim, -0.5, 0.5))(observed[i] + runif(1, -0.5, 0.5))
3075 }else{
3076 scaledResiduals[i] <- DHARMa.ecdf(simulations[i,])(observed[i])
3077 }
3078 }
3079
3080 } else {
3081
3082
3083 scaledResiduals = rep(NA, n)
3084 for (i in 1:n){
3085 min <- sum(simulations[i,] < observed[i]) / length(simulations[i,])
3086 max <- sum(simulations[i,] <= observed[i]) / length(simulations[i,])
3087 if (min == max) scaledResiduals[i] = DHARMa.ecdf(simulations[i,])(observed[i])
3088 else{
3089 scaledResiduals[i] = runif(1, min, max)
3090 }
3091 }
3092 }
3093
3094 return(scaledResiduals)
3095 }
3096
3097 #
3098 #
3099 # testData = createData(sampleSize = 200, family = gaussian(),
3100 # randomEffectVariance = 0, numGroups = 5)
3101 # fittedModel <- glmmTMB(observedResponse ~ Environment1,
3102 # data = testData)
3103 # simulationOutput <- simulateResiduals(fittedModel = fittedModel)
3104 #
3105 # sims = simulationOutput$simulatedResponse
3106 # sims[1, c(1,6,8)] = 0
3107 # any(apply(sims, 1, anyDuplicated))
3108 # getQuantile(simulations = sims, observed = testData$observedResponse, n = 200, integerResponse = F, nSim = 250)
3109 #
3110 #
3111 #
3112
3113
3114
3115 #' Check dot operator
3116 #'
3117 #' @param name variable name
3118 #' @param default variable default
3119 #'
3120 #' @details modified from https://github.com/lcolladotor/dots
3121 #'
3122 #' @keywords internal
3123 checkDots <- function(name, default, ...) {
3124 args <- list(...)
3125 if(!name %in% names(args)) {
3126 ## Default value
3127 return(default)
3128 } else {
3129 ## If the argument was defined in the ... part, return it
3130 return(args[[name]])
3131 }
3132 }
3133
3134
3135 securityAssertion <- function(context = "Not provided", stop = F){
3136 generalMessage = "Message from DHARMa: During the execution of a DHARMa function, some unexpected conditions occurred. Even if you didn't get an error, your results may not be reliable. Please check with the help if you use the functions as intended. If you think that the error is not on your side, I would be grateful if you could report the problem at https://github.com/florianhartig/DHARMa/issues \n\n Context:"
3137 if (stop == F) warning(paste(generalMessage, context))
3138 else stop(paste(generalMessage, context))
3139 }
3140
3141 ####################### helper.R
3142
3143 ####################### plot.R
3144
3145 #' DHARMa standard residual plots
3146 #'
3147 #' This function creates standard plots for the simulated residuals
3148 #' @param x an object with simulated residuals created by \code{\link{simulateResiduals}}
3149 #' @param rank if T (default), the values of pred will be rank transformed. This will usually make patterns easier to spot visually, especially if the distribution of the predictor is skewed.
3150 #' @param ... further options for \code{\link{plotResiduals}}. Consider in particular parameters quantreg, rank and asFactor. xlab, ylab and main cannot be changed when using plotSimulatedResiduals, but can be changed when using plotResiduals.
3151 #' @details The function creates two plots. To the left, a qq-uniform plot to detect deviations from overall uniformity of the residuals (calling \code{\link{plotQQunif}}), and to the right, a plot of residuals against predicted values (calling \code{\link{plotResiduals}}). Outliers are highlighted in red (for more on outliers, see \code{\link{testOutliers}}). For a correctly specified model, we would expect
3152 #'
3153 #' a) a straight 1-1 line in the uniform qq-plot -> evidence for an overall uniform (flat) distribution of the residuals
3154 #'
3155 #' b) uniformity of residuals in the vertical direction in the res against predictor plot
3156 #'
3157 #' Deviations of this can be interpreted as for a linear regression. See the vignette for detailed examples.
3158 #'
3159 #' To provide a visual aid in detecting deviations from uniformity in y-direction, the plot of the residuals against the predicted values also performs an (optional) quantile regression, which provides 0.25, 0.5 and 0.75 quantile lines across the plots. These lines should be straight, horizontal, and at y-values of 0.25, 0.5 and 0.75. Note, however, that some deviations from this are to be expected by chance, even for a perfect model, especially if the sample size is small. See further comments on this plot, its interpretation and options, in \code{\link{plotResiduals}}
3160 #'
3161 #' The quantile regression can take some time to calculate, especially for larger datasets. For that reason, quantreg = F can be set to produce a smooth spline instead. This is default for n > 2000.
3162 #'
3163 #' @seealso \code{\link{plotResiduals}}, \code{\link{plotQQunif}}
3164 #' @example inst/examples/plotsHelp.R
3165 #' @import graphics
3166 #' @import utils
3167 #' @export
3168 plot.DHARMa <- function(x, rank = TRUE, ...){
3169
3170 oldpar <- par(mfrow = c(1,2), oma = c(0,1,2,1))
3171 on.exit(par(oldpar))
3172
3173 plotQQunif(x)
3174 plotResiduals(x, rank = rank, ...)
3175
3176 mtext("DHARMa residual diagnostics", outer = T)
3177 }
3178
3179
3180 #' Histogram of DHARMa residuals
3181 #'
3182 #' The function produces a histogram from a DHARMa output
3183 #'
3184 #' @param x a DHARMa simulation output (class DHARMa)
3185 #' @param breaks breaks for hist() function
3186 #' @param col col for hist bars
3187 #' @param main plot main
3188 #' @param xlab plot xlab
3189 #' @param cex.main plot cex.main
3190 #' @param ... other arguments to be passed on to hist
3191 #' @seealso \code{\link{plotSimulatedResiduals}}, \code{\link{plotResiduals}}
3192 #' @example inst/examples/plotsHelp.R
3193 #' @export
3194 hist.DHARMa <- function(x,
3195 breaks = seq(-0.02, 1.02, len = 53),
3196 col = c("red",rep("lightgrey",50), "red"),
3197 main = "Hist of DHARMa residuals",
3198 xlab = "Residuals (outliers are marked red)",
3199 cex.main = 1,
3200 ...){
3201
3202 x = ensureDHARMa(x, convert = T)
3203
3204 val = x$scaledResiduals
3205 val[val == 0] = -0.01
3206 val[val == 1] = 1.01
3207
3208 hist(val, breaks = breaks, col = col, main = main, xlab = xlab, cex.main = cex.main, ...)
3209 }
3210
3211
3212 #' DHARMa standard residual plots
3213 #'
3214 #' DEPRECATED, use plot() instead
3215 #'
3216 #' @param simulationOutput an object with simulated residuals created by \code{\link{simulateResiduals}}
3217 #' @param ... further options for \code{\link{plotResiduals}}. Consider in particular parameters quantreg, rank and asFactor. xlab, ylab and main cannot be changed when using plotSimulatedResiduals, but can be changed when using plotResiduals.
3218 #' @note This function is deprecated. Use \code{\link{plot.DHARMa}}
3219 #'
3220 #' @seealso \code{\link{plotResiduals}}, \code{\link{plotQQunif}}
3221 #' @export
3222 plotSimulatedResiduals <- function(simulationOutput, ...){
3223 message("plotSimulatedResiduals is deprecated, please switch your code to simply using the plot() function")
3224 plot(simulationOutput, ...)
3225 }
3226
3227
3228 #' Quantile-quantile plot for a uniform distribution
3229 #'
3230 #' The function produces a uniform quantile-quantile plot from a DHARMa output
3231 #'
3232 #' @param simulationOutput a DHARMa simulation output (class DHARMa)
3233 #' @param testUniformity if T, the function \code{\link{testUniformity}} will be called and the result will be added to the plot
3234 #' @param testOutliers if T, the function \code{\link{testOutliers}} will be called and the result will be added to the plot
3235 #' @param testDispersion if T, the function \code{\link{testDispersion}} will be called and the result will be added to the plot
3236 #' @param ... arguments to be passed on to \code{\link[gap]{qqunif}}
3237 #'
3238 #' @details the function calls qqunif from the R package gap to create a quantile-quantile plot for a uniform distribution.
3239 #' @seealso \code{\link{plotSimulatedResiduals}}, \code{\link{plotResiduals}}
3240 #' @example inst/examples/plotsHelp.R
3241 #' @export
3242 plotQQunif <- function(simulationOutput, testUniformity = T, testOutliers = T, testDispersion = T, ...){
3243
3244 simulationOutput = ensureDHARMa(simulationOutput, convert = "Model")
3245
3246 gap::qqunif(simulationOutput$scaledResiduals,pch=2,bty="n", logscale = F, col = "black", cex = 0.6, main = "QQ plot residuals", cex.main = 1, ...)
3247
3248 if(testUniformity == TRUE){
3249 temp = testUniformity(simulationOutput, plot = F)
3250 legend("topleft", c(paste("KS test: p=", round(temp$p.value, digits = 5)), paste("Deviation ", ifelse(temp$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(temp$p.value < 0.05, "red", "black" ), bty="n")
3251 }
3252
3253 if(testOutliers == TRUE){
3254 temp = testOutliers(simulationOutput, plot = F)
3255 legend("bottomright", c(paste("Outlier test: p=", round(temp$p.value, digits = 5)), paste("Deviation ", ifelse(temp$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(temp$p.value < 0.05, "red", "black" ), bty="n")
3256 }
3257
3258 if(testDispersion == TRUE){
3259 temp = testDispersion(simulationOutput, plot = F)
3260 legend("center", c(paste("Dispersion test: p=", round(temp$p.value, digits = 5)), paste("Deviation ", ifelse(temp$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(temp$p.value < 0.05, "red", "black" ), bty="n")
3261 }
3262
3263 }
3264
3265
3266 #' Generic res ~ pred scatter plot with spline or quantile regression on top
3267 #'
3268 #' The function creates a generic residual plot with either spline or quantile regression to highlight patterns in the residuals. Outliers are highlighted in red.
3269 #'
3270 #' @param simulationOutput on object, usually a DHARMa object, from which residual values can be extracted. Alternatively, a vector with residuals or a fitted model can be provided, which will then be transformed into a DHARMa object.
3271 #' @param form optional predictor against which the residuals should be plotted. Default is to used the predicted(simulationOutput)
3272 #' @param quantreg whether to perform a quantile regression on 0.25, 0.5, 0.75 on the residuals. If F, a spline will be created instead. Default NULL chooses T for nObs < 2000, and F otherwise.
3273 #' @param rank if T, the values provided in form will be rank transformed. This will usually make patterns easier to spot visually, especially if the distribution of the predictor is skewed. If form is a factor, this has no effect.
3274 #' @param asFactor should a numeric predictor provided in form be treated as a factor. Default is to choose this for < 10 unique values, as long as enough predictions are available to draw a boxplot.
3275 #' @param smoothScatter if T, a smooth scatter plot will plotted instead of a normal scatter plot. This makes sense when the number of residuals is very large. Default NULL chooses T for nObs < 10000, and F otherwise.
3276 #' @param quantiles for a quantile regression, which quantiles should be plotted
3277 #' @param ... additional arguments to plot / boxplot.
3278 #' @details The function plots residuals against a predictor (by default against the fitted value, extracted from the DHARMa object, or any other predictor).
3279 #'
3280 #' Outliers are highlighted in red (for information on definition and interpretation of outliers, see \code{\link{testOutliers}}).
3281 #'
3282 #' To provide a visual aid in detecting deviations from uniformity in y-direction, the plot function calculates an (optional) quantile regression, which compares the empirical 0.25, 0.5 and 0.75 quantiles (default) in y direction (red solid lines) with the theoretical 0.25, 0.5 and 0.75 quantiles (dashed black line).
3283 #'
3284 #' Asymptotically (i.e. for lots of data / residuals), if the model is correct, theoretical and the empirical quantiles should be identical (i.e. dashed and solid lines should match). A p-value for the deviation is calculated for each quantile line. Significant deviations are highlighted by red color.
3285 #'
3286 #' If form is a factor, a boxplot will be plotted instead of a scatter plot. The distribution for each factor level should be uniformly distributed, so the box should go from 0.25 to 0.75, with the median line at 0.5. Again, chance deviations from this will increases when the sample size is smaller. You can run null simulations to test if the deviations you see exceed what you would expect from random variation. If you want to create box plots for categorical predictors (e.g. because you only have a small number of unique numeric predictor values), you can convert your predictor with as.factor(pred)
3287 #'
3288 #' @return if quantile tests are performed, the function returns them invisibly.
3289 #'
3290 #' @note The quantile regression can take some time to calculate, especially for larger datasets. For that reason, quantreg = F can be set to produce a smooth spline instead.
3291 #'
3292 #' @seealso \code{\link{plotQQunif}}
3293 #' @example inst/examples/plotsHelp.R
3294 #' @export
3295 plotResiduals <- function(simulationOutput, form = NULL, quantreg = NULL, rank = F, asFactor = NULL, smoothScatter = NULL, quantiles = c(0.25, 0.5, 0.75), ...){
3296
3297
3298 ##### Checks #####
3299
3300
3301 a <- list(...)
3302 a$ylab = checkDots("ylab", "Standardized residual", ...)
3303 if(is.null(form)){
3304 a$xlab = checkDots("xlab", ifelse(rank, "Model predictions (rank transformed)", "Model predictions"), ...)
3305 }
3306
3307 simulationOutput = ensureDHARMa(simulationOutput, convert = T)
3308 res = simulationOutput$scaledResiduals
3309 if(inherits(form, "DHARMa"))stop("DHARMa::plotResiduals > argument form cannot be of class DHARMa. Note that the syntax of plotResiduals has changed since DHARMa 0.3.0. See ?plotResiduals.")
3310
3311 pred = ensurePredictor(simulationOutput, form)
3312
3313 ##### Rank transform and factor conversion#####
3314
3315 if(!is.factor(pred)){
3316
3317 if (rank == T){
3318 pred = rank(pred, ties.method = "average")
3319 pred = pred / max(pred)
3320 }
3321
3322 nuniq = length(unique(pred))
3323 ndata = length(pred)
3324 if(is.null(asFactor)) asFactor = (nuniq == 1) | (nuniq < 10 & ndata / nuniq > 10)
3325 if (asFactor) pred = factor(pred)
3326 }
3327
3328 ##### Residual scatter plots #####
3329
3330 if(is.null(quantreg)) if (length(res) > 2000) quantreg = FALSE else quantreg = TRUE
3331
3332 switchScatter = 10000
3333 if(is.null(smoothScatter)) if (length(res) > switchScatter) smoothScatter = TRUE else smoothScatter = FALSE
3334
3335 blackcol = rgb(0,0,0, alpha = max(0.1, 1 - 3 * length(res) / switchScatter))
3336
3337
3338 # categorical plot
3339 if(is.factor(pred)){
3340 do.call(plot, append(list(res ~ pred, ylim = c(0,1), axes = FALSE), a))
3341 }
3342 # smooth scatter
3343 else if (smoothScatter == TRUE) {
3344 defaultCol = ifelse(res == 0 | res == 1, 2,blackcol)
3345 do.call(graphics::smoothScatter, append(list(x = pred, y = res , ylim = c(0,1), axes = FALSE, colramp = colorRampPalette(c("white", "darkgrey"))),a))
3346 points(pred[defaultCol == 2], res[defaultCol == 2], col = "red", cex = 0.5)
3347 }
3348 # normal plot
3349 else{
3350 defaultCol = ifelse(res == 0 | res == 1, 2,blackcol)
3351 defaultPch = ifelse(res == 0 | res == 1, 8,1)
3352 a$col = checkDots("col", defaultCol, ...)
3353 a$pch = checkDots("pch", defaultPch, ...)
3354 do.call(plot, append(list(res ~ pred, ylim = c(0,1), axes = FALSE), a))
3355 }
3356
3357 axis(1)
3358 axis(2, at=c(0, 0.25, 0.5, 0.75, 1))
3359
3360 ##### Quantile regressions #####
3361
3362 main = checkDots("main", "Residual vs. predicted", ...)
3363 out = NULL
3364
3365 if(is.numeric(pred)){
3366 if(quantreg == F){
3367 title(main = main, cex.main = 1)
3368 abline(h = c(0.25, 0.5, 0.75), col = "black", lwd = 0.5, lty = 2)
3369 try({
3370 lines(smooth.spline(pred, res, df = 10), lty = 2, lwd = 2, col = "red")
3371 abline(h = 0.5, col = "red", lwd = 2)
3372 }, silent = T)
3373 }else{
3374
3375 out = testQuantiles(simulationOutput, pred, quantiles = quantiles, plot = F)
3376
3377
3378 if(any(out$pvals < 0.05, na.rm = TRUE)){
3379 main = paste(main, "Quantile deviations detected (red curves)", sep ="\n")
3380 if(out$p.value <= 0.05){
3381 main = paste(main, "Combined adjusted quantile test significant", sep ="\n")
3382 } else {
3383 main = paste(main, "Combined adjusted quantile test n.s.", sep ="\n")
3384 }
3385 maincol = "red"
3386 } else {
3387 main = paste(main, "No significant problems detected", sep ="\n")
3388 maincol = "black"
3389 }
3390
3391
3392 title(main = main, cex.main = 0.8,
3393 col.main = maincol)
3394
3395 for(i in 1:length(quantiles)){
3396
3397 lineCol = ifelse(out$pvals[i] <= 0.05 & !(is.na(out$pvals[i])), "red", "black")
3398 filCol = ifelse(out$pvals[i] <= 0.05 & !(is.na(out$pvals[i])), "#FF000040", "#00000020")
3399
3400 abline(h = quantiles[i], col = lineCol, lwd = 0.5, lty = 2)
3401 polygon(c(out$predictions$pred, rev(out$predictions$pred)),
3402 c(out$predictions[,2*i] - out$predictions[,2*i+1], rev(out$predictions[,2*i] + out$predictions[,2*i+1])),
3403 col = "#00000020", border = F)
3404 lines(out$predictions$pred, out$predictions[,2*i], col = lineCol, lwd = 2)
3405 }
3406
3407 # legend("bottomright", c(paste("Quantile test: p=", round(out$p.value, digits = 5)), paste("Deviation ", ifelse(out$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(out$p.value < 0.05, "red", "black" ), bty="n")
3408
3409 }
3410 }
3411 invisible(out)
3412 }
3413
3414 x = 0.01
3415 x <= 0.05 & !(is.na(x))
3416
3417
3418 #' Ensures the existence of a valid predictor to plot residuals against
3419 #'
3420 #' @param simulationOutput a DHARMa simulation output or an object that can be converted into a DHARMa simulation output
3421 #' @param predictor an optional predictor. If no predictor is provided, will try to extract the fitted value
3422 #' @keywords internal
3423 ensurePredictor <- function(simulationOutput,
3424 predictor = NULL){
3425 if(!is.null(predictor)){
3426
3427 if(length(predictor) != length(simulationOutput$scaledResiduals)) stop("DHARMa: residuals and predictor do not have the same length. The issue is possibly that you have NAs in your predictor that were removed during the model fit. Remove the NA values from your predictor.")
3428 } else {
3429
3430 predictor = simulationOutput$fittedPredictedResponse
3431 if(is.null(predictor)) stop("DHARMa: can't extract predictor from simulationOutput, and no predictor provided")
3432 }
3433 return(predictor)
3434 }
3435
3436
3437
3438
3439 #plot(simulationOutput)
3440
3441 #plot(simulationOutput$observedResponse, simulationOutput$scaledResiduals, xlab = "predicted", ylab = "Residual", main = "Residual vs. predicted")
3442
3443 #plot(simulationOutput$observedResponse, simulationOutput$fittedPredictedResponse - simulationOutput$observedResponse)
3444
3445 #plot(cumsum(sort(simulationOutput$scaledResiduals)))
3446
3447
3448 #plotConventionalResiduals(fittedModel)
3449
3450
3451 #' Conventional residual plot
3452 #'
3453 #' Convenience function to draw conventional residual plots
3454 #'
3455 #' @param fittedModel a fitted model object
3456 #' @export
3457 plotConventionalResiduals <- function(fittedModel){
3458 opar <- par(mfrow = c(1,3), oma = c(0,1,2,1))
3459 on.exit(par(opar))
3460 plot(predict(fittedModel), resid(fittedModel, type = "deviance"), main = "Deviance" , ylab = "Residual", xlab = "Predicted")
3461 plot(predict(fittedModel), resid(fittedModel, type = "pearson") , main = "Pearson", ylab = "Residual", xlab = "Predicted")
3462 plot(predict(fittedModel), resid(fittedModel, type = "response") , main = "Raw residuals" , ylab = "Residual", xlab = "Predicted")
3463 mtext("Conventional residual plots", outer = T)
3464 }
3465
3466
3467
3468
3469 #
3470 #
3471 # if(quantreg == F){
3472 #
3473 # lines(smooth.spline(simulationOutput$fittedPredictedResponse, simulationOutput$scaledResiduals, df = 10), lty = 2, lwd = 2, col = "red")
3474 #
3475 # abline(h = 0.5, col = "red", lwd = 2)
3476 #
3477 # }else{
3478 #
3479 # #library(gamlss)
3480 #
3481 # # qrnn
3482 #
3483 # # http://r.789695.n4.nabble.com/Quantile-GAM-td894280.html
3484 #
3485 # #require(quantreg)
3486 # #dat <- plyr::arrange(dat,pred)
3487 # #fit<-quantreg::rqss(resid~qss(pred,constraint="N"),tau=0.5,data = dat)
3488 #
3489 # probs = c(0.25, 0.50, 0.75)
3490 #
3491 # w <- p <- list()
3492 # for(i in seq_along(probs)){
3493 # capture.output(w[[i]] <- qrnn::qrnn.fit(x = as.matrix(simulationOutput$fittedPredictedResponse), y = as.matrix(simulationOutput$scaledResiduals), n.hidden = 4, tau = probs[i], iter.max = 1000, n.trials = 1, penalty = 1))
3494 # p[[i]] <- qrnn::qrnn.predict(as.matrix(sort(simulationOutput$fittedPredictedResponse)), w[[i]])
3495 # }
3496 #
3497 #
3498 #
3499 # #plot(simulationOutput$fittedPredictedResponse, simulationOutput$scaledResiduals, xlab = "Predicted", ylab = "Residual", main = "Residual vs. predicted\n lines should match", cex.main = 1)
3500 #
3501 # #lines(sort(simulationOutput$fittedPredictedResponse), as.vector(p[[1]]), col = "red")
3502 #
3503 # matlines(sort(simulationOutput$fittedPredictedResponse), matrix(unlist(p), nrow = length(simulationOutput$fittedPredictedResponse), ncol = length(p)), col = "red", lty = 1)
3504 #
3505 # # as.vector(p[[1]])
3506 # #
3507 # #
3508 # # lines(simulationOutput$fittedPredictedResponse,p[[1]], col = "red", lwd = 2)
3509 # # abline(h = 0.5, col = "red", lwd = 2)
3510 # #
3511 # # fit<-quantreg::rqss(resid~qss(pred,constraint="N"),tau=0.25,data = dat)
3512 # # lines(unique(dat$pred)[-1],fit$coef[1] + fit$coef[-1], col = "green", lwd = 2, lty =2)
3513 # # abline(h = 0.25, col = "green", lwd = 2, lty =2)
3514 # #
3515 # # fit<-quantreg::rqss(resid~qss(pred,constraint="N"),tau=0.75,data = dat)
3516 # # lines(unique(dat$pred)[-1],fit$coef[1] + fit$coef[-1], col = "blue", lwd = 2, lty = 2)
3517 # # abline(h = 0.75, col = "blue", lwd = 2, lty =2)
3518 # }
3519
3520 ####################### plot.R
3521
3522 ####################### random.R
3523
3524 #' Record and restore a random state
3525 #'
3526 #' The aim of this function is to record, manipualate and restor a random state
3527 #'
3528 #' @details This function is intended for two (not mutually exclusive tasks)
3529 #'
3530 #' a) record the current random state
3531 #'
3532 #' b) change the current random state in a way that the previous state can be restored
3533 #'
3534 #' @return a list with various infos about the random state that after function execution, as well as a function to restore the previous state before the function execution
3535 #'
3536 #' @param seed seed argument to set.seed(). NULL = no seed, but random state will be restored. F = random state will not be restored
3537 #' @export
3538 #' @example inst/examples/getRandomStateHelp.R
3539 #' @author Florian Hartig
3540 #'
3541 getRandomState <- function(seed = NULL){
3542
3543 # better to explicitly access the global RS?
3544 # current = get(".Random.seed", .GlobalEnv, ifnotfound = NULL)
3545
3546 current = mget(".Random.seed", envir = .GlobalEnv, ifnotfound = list(NULL))[[1]]
3547
3548 if(is.logical(seed) & seed == F){
3549 restoreCurrent <- function(){}
3550 }else{
3551 restoreCurrent <- function(){
3552 if(is.null(current)) rm(".Random.seed", envir = .GlobalEnv) else assign(".Random.seed", current , envir = .GlobalEnv)
3553 }
3554 }
3555
3556 # setting seed
3557 if(is.numeric(seed)) set.seed(seed)
3558
3559 # ensuring that RNG has been initialized
3560 if (is.null(current))runif(1)
3561
3562 randomState = list(seed, state = get(".Random.seed", globalenv()), kind = RNGkind(), restoreCurrent = restoreCurrent)
3563 return(randomState)
3564 }
3565
3566 ####################### random.R
3567
3568 ######################################### Package DHARMa