Mercurial > repos > ecology > pampa_glmcomm
diff FunctExeCalcGLMGalaxy.r @ 0:f0dc3958e65d draft
"planemo upload for repository https://github.com/ColineRoyaux/PAMPA-Galaxy commit 07f1028cc764f920b1e6419c151f04ab4e3600fa"
author | ecology |
---|---|
date | Tue, 21 Jul 2020 06:00:31 -0400 |
parents | |
children | 44b9069775ca |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FunctExeCalcGLMGalaxy.r Tue Jul 21 06:00:31 2020 -0400 @@ -0,0 +1,277 @@ +#Rscript + +##################################################################################################################### +##################################################################################################################### +################################# Compute a Generalized Linear Model from your data ################################# +##################################################################################################################### +##################################################################################################################### + +###################### Packages +suppressMessages(library(multcomp)) +suppressMessages(library(glmmTMB)) ###Version: 0.2.3 +suppressMessages(library(gap)) + +###################### Load arguments and declaring variables + +args = commandArgs(trailingOnly=TRUE) +#options(encoding = "UTF-8") + +if (length(args) < 10) { + stop("At least 4 arguments must be supplied : \n- two input dataset files (.tabular) : metrics table and unitobs table \n- Interest variable field from metrics table \n- Response variable from unitobs table.", call.=FALSE) #si pas d'arguments -> affiche erreur et quitte / if no args -> error and exit1 + +} else { + Importdata <- args[1] ###### file name : metrics table + ImportUnitobs <- args[2] ###### file name : unitobs informations + colmetric <- as.numeric(args[3]) ###### Selected interest metric for GLM + listFact <- strsplit(args [4],",")[[1]] ###### Selected response factors for GLM + listRand <- strsplit(args [5],",")[[1]] ###### Selected randomized response factors for GLM + colFactAna <- args[6] ####### (optional) Selected splitting factors for GLMs + Distrib <- args[7] ###### (optional) Selected distribution for GLM + log <- args[8] ###### (Optional) Log on interest metric ? + aggreg <- args[9] ###### Aggregation level of the data table + source(args[10]) ###### Import functions + +} +#### Data must be a dataframe with at least 3 variables : unitobs representing location and year ("observation.unit"), species code ("species.code") and abundance ("number") + + +#Import des données / Import data +obs<- read.table(Importdata,sep="\t",dec=".",header=TRUE,encoding="UTF-8") # +obs[obs == -999] <- NA +metric <- colnames(obs)[colmetric] +tabUnitobs <- read.table(ImportUnitobs,sep="\t",dec=".",header=TRUE,encoding="UTF-8") +tabUnitobs[tabUnitobs == -999] <- NA + +if (colFactAna != "None") +{ + FactAna <- colnames(tabUnitobs)[as.numeric(colFactAna)] + if (class(tabUnitobs[FactAna]) == "numeric" || FactAna == "observation.unit"){stop("Wrong chosen separation factor : Analysis can't be separated by observation unit or numeric factor")} +}else{ + FactAna <- colFactAna +} + + +#factors <- fact.det.f(Obs=obs) + +vars_data1<- NULL +err_msg_data1<-"The input metrics dataset doesn't have the right format. It needs to have at least the following 2 variables :\n- observation.unit (or year and site)\n- numeric or integer metric\n" +check_file(obs,err_msg_data1,vars_data1,2) + +vars_data2 <- c(listFact,listRand) +err_msg_data2<-"The input unitobs dataset doesn't have the right format. It needs to have at least the following 2 variables :\n- observation.unit (or year and site)\n- factors used in GLM (habitat, year and/or site)\n" +check_file(tabUnitobs,err_msg_data2,vars_data2[vars_data2 != "None"],2) + +#################################################################################################### +########## Computing Generalized Linear Model ## Function : modeleLineaireWP2.unitobs.f ############ +#################################################################################################### + +modeleLineaireWP2.unitobs.f <- function(metrique, listFact, listRand, FactAna, Distrib, log=FALSE, tabMetrics, tableMetrique, tabUnitobs, unitobs="observation.unit", nbName="number") +{ + ## Purpose: Monitoring steps for GLM on unitobs + ## ---------------------------------------------------------------------- + ## Arguments: metrique : selected metric + ## listFact : Factors for GLM + ## listRand : Random factors for GLM + ## factAna : Separation factor for GLMs + ## Distrib : selected distribution for model + ## log : log transformation on data ? boolean + ## tabMetrics : data table metrics + ## tableMetrique : data table's name + ## tabUnitobs : data table unitobs + ## ---------------------------------------------------------------------- + ## Author: Yves Reecht, Date: 18 août 2010, 15:59 modified by Coline ROYAUX 04 june 2020 + + tmpData <- tabMetrics + + if (listRand[1] != "None") + { + if (all(is.element(listFact,listRand)) || listFact[1] == "None") + { + RespFact <- paste("(1|",paste(listRand,collapse=") + (1|"),")") + listF <- NULL + listFact <- listRand + }else{ + listF <- listFact[!is.element(listFact,listRand)] + RespFact <- paste(paste(listF, collapse=" + ")," + (1|",paste(listRand,collapse=") + (1|"),")") + listFact <- c(listF,listRand) + } + }else{ + listF <- listFact + RespFact <- paste(listFact, collapse=" + ") + } + + ##Creating model's expression : + + #if (log == FALSE) { + exprML <- eval(parse(text=paste(metrique, "~", RespFact))) + #}else{ + # exprML <- eval(parse(text=paste("log(",metrique,")", "~", RespFact))) + #} + + ##Creating analysis table : + + listFactTab <- c(listFact, FactAna) + listFactTab <- listFactTab[listFactTab != "None"] + + if (all(is.na(match(tmpData[,unitobs],tabUnitobs[,unitobs])))) {stop("Observation units doesn't match in the two input tables")} + + if(! is.element("species.code",colnames(tmpData))) + { + col <- c(unitobs,metrique) + tmpData <- cbind(tmpData[,col], tabUnitobs[match(tmpData[,unitobs],tabUnitobs[,unitobs]),listFactTab]) + colnames(tmpData) <- c(col,listFactTab) + + for (i in listFactTab) { + switch(i, + tmpData[,i] <- as.factor(tmpData[,i])) + } + }else{ + stop("Warning : wrong data frame, data frame should be aggregated by observation unit (year and site)") + } + + ## Suppress unsed 'levels' : + tmpData <- dropLevels.f(tmpData) + + ## Automatic choice of distribution if none is selected by user : + if (Distrib == "None") + { + switch(class(tmpData[,metrique]), + "integer"={loiChoisie <- "poisson"}, + "numeric"={loiChoisie <- "gaussian"}, + stop("Selected metric class doesn't fit, you should select an integer or a numeric variable")) + }else{ + loiChoisie <- Distrib + } + + + ## Compute Model(s) : + + if (FactAna != "None" && nlevels(tmpData[,FactAna]) > 1) + { + Anacut <- levels(tmpData[,FactAna]) + }else{ + Anacut <- NULL + } + + ##Create results table : + lev <- unlist(lapply(listF,FUN=function(x){levels(tmpData[,x])})) + + if (listRand[1] != "None") ## if random effects + { + TabSum <- data.frame(analysis=c("global", Anacut),AIC=NA,BIC=NA,logLik=NA, deviance=NA,df.resid=NA) + colrand <- unlist(lapply(listRand, + FUN=function(x){lapply(c("Std.Dev","NbObservation","NbLevels"), + FUN=function(y){paste(x,y,collapse = ":") + }) + })) + TabSum[,colrand] <- NA + + if (! is.null(lev)) ## if fixed effects + random effects + { + colcoef <- unlist(lapply(c("(Intercept)",lev), + FUN=function(x){lapply(c("Estimate","Std.Err","Zvalue","Pvalue","signif"), + FUN=function(y){paste(x,y,collapse = ":") + }) + })) + }else{ ## if no fixed effects + colcoef <- NULL + } + + }else{ ## if no random effects + TabSum <- data.frame(analysis=c("global", Anacut),AIC=NA,Resid.deviance=NA,df.resid=NA,Null.deviance=NA,df.null=NA) + + switch(loiChoisie, + "gaussian"={colcoef <- unlist(lapply(c("(Intercept)",lev), + FUN=function(x){lapply(c("Estimate","Std.Err","Tvalue","Pvalue","signif"), + FUN=function(y){paste(x,y,collapse = ":") + }) + }))}, + "quasipoisson"={colcoef <- unlist(lapply(c("(Intercept)",lev), + FUN=function(x){lapply(c("Estimate","Std.Err","Tvalue","Pvalue","signif"), + FUN=function(y){paste(x,y,collapse = ":") + }) + }))}, + colcoef <- unlist(lapply(c("(Intercept)",lev), + FUN=function(x){lapply(c("Estimate","Std.Err","Zvalue","Pvalue","signif"), + FUN=function(y){paste(x,y,collapse = ":") + }) + }))) + + } + + TabSum[,colcoef] <- NA + + ### creating rate table + TabRate <- data.frame(analysis=c("global", Anacut), complete_plan=NA, balanced_plan=NA, NA_proportion_OK=NA, no_residual_dispersion=NA, uniform_residuals=NA, outliers_proportion_OK=NA, no_zero_inflation=NA, observation_factor_ratio_OK=NA, enough_levels_random_effect=NA, rate=NA) + + for (cut in Anacut) + { + cutData <- tmpData[grep(cut,tmpData[,FactAna]),] + cutData <- dropLevels.f(cutData) + + res <-"" + + if (listRand[1] != "None") + { + res <- tryCatch(glmmTMB(exprML,family=loiChoisie, data=cutData), error=function(e){}) + }else{ + res <- tryCatch(glm(exprML,data=cutData,family=loiChoisie), error=function(e){}) + } + + ## Write results : + if (! is.null(res)) + { + TabSum <- sortiesLM.f(objLM=res, TabSum=TabSum, metrique=metrique, + factAna=factAna, cut=cut, colAna="analysis", lev=lev, #modSel=iFactGraphSel, listFactSel=listFactSel, + listFact=listFact, + Data=cutData, #Log=Log, + type=ifelse(tableMetrique == "unitSpSz" && factAna != "size.class", + "CL_unitobs", + "unitobs")) + + TabRate[TabRate[,"analysis"]==cut,c(2:11)] <- noteGLM.f(data=cutData, objLM=res, metric=metrique, listFact=listFact, details=TRUE) + + }else{ + cat("\nCannot compute GLM for level",cut,"Check if one or more factor(s) have only one level, or try with another distribution for the model in advanced settings \n\n") + } + + } + + ## Global analysis : + + if (listRand[1] != "None") + { + resG <- glmmTMB(exprML,family=loiChoisie, data=tmpData) + }else{ + resG <- glm(exprML,data=tmpData,family=loiChoisie) + } + + ## write results : + TabSum <- sortiesLM.f(objLM=resG, TabSum=TabSum, metrique=metrique, + factAna=factAna, cut="global", colAna="analysis", lev=lev, #modSel=iFactGraphSel, listFactSel=listFactSel, + listFact=listFact, + Data=tmpData, #Log=Log, + type=ifelse(tableMetrique == "unitSpSz" && factAna != "size.class", + "CL_unitobs", + "unitobs")) + + TabRate[TabRate[,"analysis"]=="global",c(2:11)] <- noteGLM.f(data=tmpData, objLM=resG, metric=metrique, listFact=listFact, details=TRUE) + noteGLMs.f(tabRate=TabRate,exprML=exprML,objLM=resG, file_out=TRUE) + ## simple statistics and infos : + filename <- "GLMSummaryFull.txt" + + ## Save data on model : + + infoStats.f(filename=filename, Data=tmpData, agregLevel=aggreg, type="stat", + metrique=metrique, factGraph=factAna, #factGraphSel=modSel, + listFact=listFact)#, listFactSel=listFactSel) + + return(TabSum) + +} + +################# Analysis + +Tab <- modeleLineaireWP2.unitobs.f(metrique=metric, listFact=listFact, listRand=listRand, FactAna=FactAna, Distrib=Distrib, log=log, tabMetrics=obs, tableMetrique=aggreg, tabUnitobs=tabUnitobs, nbName="number") + +write.table(Tab,"GLMSummary.tabular", row.names=FALSE, sep="\t", dec=".",fileEncoding="UTF-8")