comparison FunctExeCalcGLMSpGalaxy.r @ 0:0778efa9eb2e draft

"planemo upload for repository https://github.com/ColineRoyaux/PAMPA-Galaxy commit 07f1028cc764f920b1e6419c151f04ab4e3600fa"
author ecology
date Tue, 21 Jul 2020 06:00:51 -0400
parents
children 6c14021f678e
comparison
equal deleted inserted replaced
-1:000000000000 0:0778efa9eb2e
1 #Rscript
2
3 #####################################################################################################################
4 #####################################################################################################################
5 ################################# Compute a Generalized Linear Model from your data #################################
6 #####################################################################################################################
7 #####################################################################################################################
8
9 ###################### Packages
10 #suppressMessages(library(MASS))
11 suppressMessages(library(multcomp))
12 suppressMessages(library(glmmTMB)) ###Version: 0.2.3
13 suppressMessages(library(gap))
14
15 ###################### Load arguments and declaring variables
16
17 args = commandArgs(trailingOnly=TRUE)
18 #options(encoding = "UTF-8")
19
20 if (length(args) < 10) {
21 stop("At least 4 arguments must be supplied : \n- two input dataset files (.tabular) : metrics table and unitobs table \n- Interest variable field from metrics table \n- Response variable from unitobs table.", call.=FALSE) #si pas d'arguments -> affiche erreur et quitte / if no args -> error and exit1
22
23 } else {
24 Importdata <- args[1] ###### file name : metrics table
25 ImportUnitobs <- args[2] ###### file name : unitobs informations
26 colmetric <- as.numeric(args[3]) ###### Selected interest metric for GLM
27 listFact <- strsplit(args [4],",")[[1]] ###### Selected response factors for GLM
28 listRand <- strsplit(args [5],",")[[1]] ###### Selected randomized response factors for GLM
29 colFactAna <- args[6] ####### (optional) Selected splitting factors for GLMs
30 Distrib <- args[7] ###### (optional) Selected distribution for GLM
31 log <- args[8] ###### (Optional) Log on interest metric ?
32 aggreg <- args[9] ###### Aggregation level of the data table
33 source(args[10]) ###### Import functions
34
35 }
36 #### Data must be a dataframe with at least 3 variables : unitobs representing location and year ("observation.unit"), species code ("species.code") and abundance ("number")
37
38
39 #Import des données / Import data
40 obs<- read.table(Importdata,sep="\t",dec=".",header=TRUE,encoding="UTF-8") #
41 obs[obs == -999] <- NA
42 metric <- colnames(obs)[colmetric]
43 tabUnitobs <- read.table(ImportUnitobs,sep="\t",dec=".",header=TRUE,encoding="UTF-8")
44 tabUnitobs[tabUnitobs == -999] <- NA
45
46 vars_data1<- c("species.code")
47 err_msg_data1<-"The input metrics dataset doesn't have the right format. It needs to have at least the following 3 variables :\n- species.code \n- observation.unit (or year and site)\n- numeric or integer metric\n"
48 check_file(obs,err_msg_data1,vars_data1,3)
49
50 vars_data2 <- c(listFact,listRand)
51 vars_data2 <- vars_data2[vars_data2 != "None"]
52 err_msg_data2<-"The input unitobs dataset doesn't have the right format. It needs to have at least the following 2 variables :\n- observation.unit (or year and site)\n- factors used in GLM (habitat, year and/or site)\n"
53 check_file(tabUnitobs,err_msg_data2,vars_data2,2)
54
55
56 if (colFactAna != "None")
57 {
58 FactAna <- colFactAna
59 if (class(obs[FactAna]) == "numeric" || FactAna == "observation.unit"){stop("Wrong chosen separation factor : Analysis can't be separated by observation unit or numeric factor")}
60 }else{
61 FactAna <- colFactAna
62 }
63
64
65 #factors <- fact.det.f(Obs=obs)
66
67 ####################################################################################################
68 ########## Computing Generalized Linear Model ## Function : modeleLineaireWP2.unitobs.f ############
69 ####################################################################################################
70
71 modeleLineaireWP2.species.f <- function(metrique, listFact, listRand, FactAna, Distrib, log=FALSE, tabMetrics, tableMetrique, tabUnitobs, unitobs="observation.unit", outresiduals = FALSE, nbName="number")
72 {
73 ## Purpose: Gestions des différentes étapes des modèles linéaires.
74 ## ----------------------------------------------------------------------
75 ## Arguments: metrique : la métrique choisie.
76 ## factAna : le facteur de séparation des graphiques.
77 ## factAnaSel : la sélection de modalités pour ce dernier
78 ## listFact : liste du (des) facteur(s) de regroupement
79 ## listFactSel : liste des modalités sélectionnées pour ce(s)
80 ## dernier(s)
81 ## tabMetrics : table de métriques.
82 ## tableMetrique : nom de la table de métriques.
83 ## dataEnv : environnement de stockage des données.
84 ## baseEnv : environnement de l'interface.
85 ## ----------------------------------------------------------------------
86 ## Author: Yves Reecht, Date: 18 août 2010, 15:59
87
88 tmpData <- tabMetrics
89
90 if (listRand[1] != "None")
91 {
92 if (all(is.element(listFact,listRand)) || listFact[1] == "None")
93 {
94 RespFact <- paste("(1|",paste(listRand,collapse=") + (1|"),")")
95 listF <- NULL
96 listFact <- listRand
97 }else{
98 listF <- listFact[!is.element(listFact,listRand)]
99 RespFact <- paste(paste(listF, collapse=" + ")," + (1|",paste(listRand,collapse=") + (1|"),")")
100 listFact <- c(listF,listRand)
101 }
102 }else{
103 listF <- listFact
104 RespFact <- paste(listFact, collapse=" + ")
105 }
106 ##Creating model's expression :
107 #if (log == FALSE) {
108 exprML <- eval(parse(text=paste(metrique, "~", RespFact)))
109 #}else{
110 # exprML <- eval(parse(text=paste("log(",metrique,")", "~", RespFact)))
111 #}
112
113 ##Creating analysis table :
114 listFactTab <- c(listFact,FactAna)
115 listFactTab <- listFactTab[listFactTab != "None"]
116
117 if (all(is.na(match(tmpData[,unitobs],tabUnitobs[,unitobs])))) {stop("Observation units doesn't match in the two input tables")}
118
119 if(is.element("species.code",colnames(tmpData)))
120 {
121 col <- c(unitobs,metrique,FactAna)
122 tmpData <- cbind(tmpData[,col], tabUnitobs[match(tmpData[,unitobs],tabUnitobs[,unitobs]),listFact])
123 colnames(tmpData) <- c(col,listFact)
124
125 for (i in listFactTab) {
126 tmpData[,i] <- as.factor(tmpData[,i])
127 }
128 }else{
129 stop("Warning : wrong data frame, data frame should be aggregated by observation unit (year and site) and species")
130 }
131
132 ## Suppression des 'levels' non utilisés :
133 tmpData <- dropLevels.f(tmpData)
134
135 ## Aide au choix du type d'analyse :
136 if (Distrib == "None")
137 {
138 if (metrique == "pres.abs")
139 {
140 loiChoisie <- "binomial"
141 }else{
142 switch(class(tmpData[,metrique]),
143 "integer"={loiChoisie <- "poisson"},
144 "numeric"={loiChoisie <- "gaussian"},
145 stop("Selected metric class doesn't fit, you should select an integer or a numeric variable"))
146 }
147 }else{
148 loiChoisie <- Distrib
149 }
150
151 ##Create results table :
152 lev <- unlist(lapply(listF,FUN=function(x){levels(tmpData[,x])}))
153
154 if (listRand[1] != "None") ## if random effects
155 {
156 TabSum <- data.frame(species=levels(tmpData[,FactAna]),AIC=NA,BIC=NA,logLik=NA, deviance=NA,df.resid=NA)
157 colrand <- unlist(lapply(listRand,
158 FUN=function(x){lapply(c("Std.Dev","NbObservation","NbLevels"),
159 FUN=function(y){paste(x,y,collapse = ":")
160 })
161 }))
162 TabSum[,colrand] <- NA
163
164 if (! is.null(lev)) ## if fixed effects + random effects
165 {
166 colcoef <- unlist(lapply(c("(Intercept)",lev),
167 FUN=function(x){lapply(c("Estimate","Std.Err","Zvalue","Pvalue","signif"),
168 FUN=function(y){paste(x,y,collapse = ":")
169 })
170 }))
171 }else{ ## if no fixed effects
172 colcoef <- NULL
173 }
174
175 }else{ ## if no random effects
176 TabSum <- data.frame(species=levels(tmpData[,FactAna]),AIC=NA,Resid.deviance=NA,df.resid=NA,Null.deviance=NA,df.null=NA)
177
178 switch(loiChoisie,
179 "gaussian"={colcoef <- unlist(lapply(c("(Intercept)",lev),
180 FUN=function(x){lapply(c("Estimate","Std.Err","Tvalue","Pvalue","signif"),
181 FUN=function(y){paste(x,y,collapse = ":")
182 })
183 }))},
184 "quasipoisson"={colcoef <- unlist(lapply(c("(Intercept)",lev),
185 FUN=function(x){lapply(c("Estimate","Std.Err","Tvalue","Pvalue","signif"),
186 FUN=function(y){paste(x,y,collapse = ":")
187 })
188 }))},
189 colcoef <- unlist(lapply(c("(Intercept)",lev),
190 FUN=function(x){lapply(c("Estimate","Std.Err","Zvalue","Pvalue","signif"),
191 FUN=function(y){paste(x,y,collapse = ":")
192 })
193 })))
194
195 }
196
197 TabSum[,colcoef] <- NA
198
199 ### creating rate table
200 TabRate <- data.frame(species=levels(tmpData[,FactAna]), complete_plan=NA, balanced_plan=NA, NA_proportion_OK=NA, no_residual_dispersion=NA, uniform_residuals=NA, outliers_proportion_OK=NA, no_zero_inflation=NA, observation_factor_ratio_OK=NA, enough_levels_random_effect=NA, rate=NA)
201
202 ## Compute Model(s) :
203
204 for (sp in levels(tmpData[,FactAna]))
205 {
206 cutData <- tmpData[grep(sp,tmpData[,FactAna]),]
207 cutData <- dropLevels.f(cutData)
208
209 res <-""
210
211 if (listRand[1] != "None")
212 {
213 res <- tryCatch(glmmTMB(exprML,family=loiChoisie, data=cutData), error=function(e){})
214 }else{
215 res <- tryCatch(glm(exprML,data=cutData,family=loiChoisie), error=function(e){})
216 }
217
218 ## Écriture des résultats formatés dans un fichier :
219 if (! is.null(res))
220 {
221 TabSum <- sortiesLM.f(objLM=res, TabSum=TabSum, factAna=factAna, cut=sp, colAna="species", lev=lev, Data=cutData, metrique=metrique, type="espece", listFact=listFact)
222
223 TabRate[TabRate[,"species"]==sp,c(2:11)] <- noteGLM.f(data=cutData, objLM=res, metric=metrique, listFact=listFact, details=TRUE)
224
225 }else{
226 cat("\nCannot compute GLM for species",sp,"Check if one or more factor(s) have only one level, or try with another distribution for the model in advanced settings \n\n")
227 }
228
229 }
230 noteGLMs.f(tabRate=TabRate,exprML=exprML,objLM=res,file_out=TRUE)
231
232 ## simple statistics and infos :
233 filename <- "GLMSummaryFull.txt"
234
235 ## Save data on model :
236
237 infoStats.f(filename=filename, Data=tmpData, agregLevel=aggreg, type="stat",
238 metrique=metrique, factGraph=factAna, #factGraphSel=modSel,
239 listFact=listFact)#, listFactSel=listFactSel)
240
241 return(TabSum)
242 }
243
244 ################# Analysis
245
246 Tab <- modeleLineaireWP2.species.f(metrique=metric, listFact=listFact, listRand=listRand, FactAna=FactAna, Distrib=Distrib, tabMetrics=obs, tableMetrique=aggreg, tabUnitobs=tabUnitobs, outresiduals=SupprOutlay, nbName="number")
247
248 write.table(Tab,"GLMSummary.tabular", row.names=FALSE, sep="\t", dec=".",fileEncoding="UTF-8")
249