comparison BilanEnrichiPF.R @ 0:823a09219c00 draft

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tools/vigiechiro commit d2de8e10c11bfa3b04729e59bba58e08d23b56aa
author ecology
date Wed, 13 Mar 2019 11:18:11 -0400
parents
children 775809e2f6c8
comparison
equal deleted inserted replaced
-1:000000000000 0:823a09219c00
1 suppressMessages(library(data.table))
2 suppressMessages(library(DT))
3 suppressMessages(library(htmlwidgets))
4
5 f2p <- function(x) #get date-time data from recording file names
6 {
7 if (is.data.frame((x)[1])) {pretemps <- vector(length = nrow(x))}
8 op <- options(digits.secs = 3)
9 pretemps <- paste(substr(x, nchar(x) - 18, nchar(x)-4), ".", substr(x, nchar(x) - 2, nchar(x)), sep = "")
10 strptime(pretemps, "%Y%m%d_%H%M%OS",tz="UTC")-7200
11 }
12
13 args <- commandArgs(trailingOnly = TRUE)
14 EchelleErreur=c("NA","POSSIBLE","PROBABLE","SUR")
15 EchelleNumErreur=c(99,50,10,1)
16
17
18 IdC2=fread(args[1])
19 refPF=fread(args[2])
20 GroupList=fread(args[3])
21
22 if(substr(IdC2$`nom du fichier`[1],2,2)!="a")
23 {
24 print("Protocole non conforme, ce script doit etre lance pour un protocole Point Fixe")
25 }else{
26
27
28 #compute error risk by species (minimum error among files)
29 #to be replaced by glm outputs if I'll have time
30 RisqueErreurT=aggregate(IdC2$IdProb,by=list(IdC2$IdExtrap),FUN=function(x) ceiling((1-max(x-0.0001))*100))
31
32 #barplot(RisqueErreurT$x,names.arg=RisqueErreurT$Group.1,las=2)
33 #compute error risk accoring to observer/validator (a little dirty because it relies on alphabetical order of confidence classes: POSSIBLE < PROBABLE < SUR)
34 RisqueErreurOV0=match(IdC2$ConfV,EchelleErreur)
35 RisqueErreurOV=aggregate(RisqueErreurOV0,by=list(IdC2$IdExtrap)
36 ,FUN=max)
37 RisqueErreurOV2=EchelleNumErreur[RisqueErreurOV$x]
38 #compute minimum error risk between man and machine
39 RisqueErreur=pmin(RisqueErreurT$x,RisqueErreurOV2,na.rm=TRUE)
40
41 #compute number of files validated per species
42 FichValid=aggregate(IdC2$IdV,by=list(IdC2$IdExtrap,IdC2$'nom du fichier')
43 ,FUN=function(x) sum(!is.na(x)))
44 NbValid2=aggregate(FichValid$x,by=list(FichValid$Group.1),FUN=function(x) sum(x>0))
45
46 DiffC50=vector() # to store the median of confidence difference between unvalidated records and validated ones
47 DiffT50=vector() # to store the median of time difference between unvalidated records and validated ones
48 for (j in 1:nlevels(as.factor(IdC2$IdExtrap)))
49 {
50 IdSp=subset(IdC2
51 ,IdC2$IdExtrap==levels(as.factor(IdC2$IdExtrap))[j])
52 IdSp=IdSp[order(IdSp$IdProb),]
53 IdSpV=subset(IdSp,!is.na(IdSp$IdV))
54 if(nrow(IdSpV)>0)
55 {
56 cuts <- c(-Inf, IdSpV$IdProb[-1]-diff(IdSpV$IdProb)/2, Inf)
57 CorrC=findInterval(IdSp$IdProb, cuts)
58 CorrC2=IdSpV$IdProb[CorrC]
59 DiffC=abs(IdSp$IdProb-CorrC2)
60 DiffC50=c(DiffC50,median(DiffC))
61
62 IdSp=IdSp[order(IdSp$TimeNum),]
63 IdSpV=subset(IdSp,!is.na(IdSp$IdV))
64 cuts <- c(-Inf, IdSpV$TimeNum[-1]-diff(IdSpV$TimeNum)/2, Inf)
65 CorrT=findInterval(IdSp$TimeNum, cuts)
66 CorrT2=IdSpV$TimeNum[CorrT]
67 DiffT=abs(IdSp$TimeNum-CorrT2)
68 DiffT50=c(DiffT50,median(DiffT))
69 }else{
70 DiffC50=c(DiffC50,Inf)
71 DiffT50=c(DiffT50,Inf)
72 }
73 }
74 #compute an index of validation effort per species
75 EffortV=1/DiffC50/DiffT50
76 EffortClass=(EffortV>0.0005)+(EffortV>0.005)+RisqueErreurOV$x
77 cbind(RisqueErreurOV,EffortV,DiffC50,DiffT50)
78 #barplot(EffortClass-1,names.arg=NbValid2$Group.1,las=2)
79 ClassEffortV=c("-","FAIBLE","SUFFISANT","SUFFISANT","FORT","FORT")
80 EffortClassMot=ClassEffortV[EffortClass]
81
82
83 #get date-night
84 pourDateNuit=IdC2$TimeNum-12*3600 #bricolage-decalage de 12 heures pour ramener a la date du debut de nuit
85 DateNuit=as.Date.POSIXct(pourDateNuit) # date of the beginning of the night
86 DateJour=as.Date.POSIXct(IdC2$TimeNum) # date (UTC+0)
87 IdC2$DateNuit=DateNuit
88 IdC2$DateJour=DateJour
89 NbNuit=as.numeric(max(IdC2$DateNuit)-min(IdC2$DateNuit))+1
90
91 #compare activity / reference frame
92 ActMoy=aggregate(IdC2$`nom du fichier`
93 ,by=list(IdC2$IdExtrap),FUN=function(x) length(x)/NbNuit)
94 ListSpref=match(ActMoy$Group.1,refPF$Espece)
95 Subref=refPF[ListSpref]
96 QualifAct=vector()
97 for (k in 1:nrow(ActMoy))
98 {
99 if(is.na(Subref$Q25[k]))
100 {
101 QualifAct=c(QualifAct,NA)
102 }else{
103 cuts=cbind(-Inf,as.numeric(Subref$Q25[k]),as.numeric(Subref$Q75[k])
104 ,as.numeric(Subref$Q98[k]),Inf)
105
106 QualifAct=c(QualifAct,findInterval(ActMoy$x[k],cuts,left.open=T))
107 }
108 }
109 ClassAct=c("FAIBLE","MODEREE","FORTE","TRES FORTE")
110 QualifActMot=ClassAct[QualifAct]
111
112 #compute activity by nights (to be completed)
113 #ActNuit=aggregate(IdC2$`nom du fichier`,by=list(IdC2$DateNuit,IdC2$IdExtrap),FUN=length)
114
115 #organize the csv summary
116 SummPart0=cbind(Esp=levels(as.factor(IdC2$IdExtrap))
117 ,RisqueErreur,NbValid=NbValid2$x,EffortValid=EffortClassMot
118 ,Contacts_Nuit=round(ActMoy$x),Niveau_Activite=QualifActMot)
119
120
121 InfoSp=c("GroupFR","NomFR","Scientific name","Esp")
122 GroupShort=GroupList[,InfoSp,with=FALSE]
123 #GroupShort=GroupList[,..InfoSp]
124 SummPart=merge(GroupShort,SummPart0,by="Esp")
125 IndexGroupe=c("Autre","Sauterelle","Chauve-souris")
126 SummPart$IndexSumm=match(SummPart$GroupFR,IndexGroupe)
127 SummPart=SummPart[with(SummPart
128 ,order(IndexSumm,as.numeric(Contacts_Nuit),decreasing=T)),]
129 colnames(SummPart)=c("Code","Groupe","Nom francais","Nom scientifique"
130 ,"Risque d'erreur (%)","Nb Validations"
131 ,"Effort de validation","Nb de Contacts par Nuit"
132 ,"Niveau d'Activite","TriGroupe")
133
134 #to do: extend colors to other columns to improve readability
135 SummHTML=datatable(SummPart, rownames = FALSE) %>%
136 formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)",
137 background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>%
138 formatStyle(columns = "Effort de validation",
139 background = styleEqual(c("-","FAIBLE","SUFFISANT","FORT"), c("white", "cyan", "royalblue", "darkblue"))) %>%
140 formatStyle(columns = c("Nb de Contacts par Nuit","Niveau d'Activite"),valueColumns="Niveau d'Activite",
141 background = styleEqual(c("FAIBLE","MODEREE","FORTE","TRES FORTE"), c("palegoldenrod", "greenyellow", "limegreen", "darkgreen")))
142
143 saveWidget(SummHTML,"output-summary.html")
144 write.table(SummPart,"output-summary.tabular",sep="\t",row.names=F,quote=FALSE)
145
146 #compute number of files validated per night/hour
147 IdC2$Heure=sapply(IdC2$`nom du fichier`,FUN=function(x) substr(x,nchar(x)-9,nchar(x)-8))
148
149 ActNuit=aggregate(IdC2$`nom du fichier`,by=list(IdC2$IdExtrap,IdC2$Session),FUN=length)
150 ListSpref=match(ActNuit$Group.1,refPF$Espece)
151 Subref=refPF[ListSpref]
152
153
154 QualifActN=vector()
155 for (k in 1:nrow(ActNuit))
156 {
157 if(is.na(Subref$Q25[k]))
158 {
159 QualifActN=c(QualifActN,NA)
160 }else{
161 cuts=cbind(-Inf,as.numeric(Subref$Q25[k]),as.numeric(Subref$Q75[k])
162 ,as.numeric(Subref$Q98[k]),Inf)
163
164 QualifActN=c(QualifActN,findInterval(ActNuit$x[k],cuts,left.open=T))
165 }
166 }
167 ActNuit$QualifActN=QualifActN
168
169 ActNuitT=dcast(data=ActNuit,formula=Group.1~Group.2
170 ,value.var="x")
171 ActNuitT[is.na(ActNuitT)]=0
172 RefNuitT=dcast(data=ActNuit,formula=Group.1~Group.2
173 ,value.var="QualifActN")
174 ARNuit=merge(ActNuitT,RefNuitT,by="Group.1")
175
176 SummPartshort=cbind(SummPart[,c(1:5)],TriGroupe=SummPart[,TriGroupe])
177 SummPartN=merge(SummPartshort,ARNuit,by.x="Code",by.y="Group.1")
178 SummPartN=SummPartN[order(TriGroupe,decreasing=T),]
179
180 test=grepl(".x",colnames(SummPartN))
181 colnames(SummPartN)=mapply(FUN=function(x,y) if(y){substr(x,1,2)}else{x}
182 ,colnames(SummPartN),test)
183 ListNuit=subset(colnames(SummPartN),test)
184 ListRef=subset(colnames(SummPartN),grepl(".y",colnames(SummPartN)))
185 testHide=match(ListRef,colnames(SummPartN))-1
186 #to do: extend colors to other columns to improve readability
187 SummHTMLN=datatable(SummPartN, rownames = FALSE,options = list(
188 columnDefs = list(list(targets = testHide,visible = FALSE)))) %>%
189 formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)",
190 background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>%
191 formatStyle(columns = ListNuit,valueColumns=ListRef,
192 background = styleEqual(c(1,2,3,4), c("palegoldenrod", "greenyellow", "limegreen", "darkgreen")))
193
194 saveWidget(SummHTMLN,"output-nightly.html")
195 write.table(SummPartN,"output-nightly.tabular",sep="\t",row.names=F,quote=FALSE)
196
197
198 #summary by hour
199 ActMoyH=dcast(data=IdC2,formula=IdExtrap~Heure
200 ,fun.aggregate=length)
201 ActMoyHA=aggregate(IdC2$`nom du fichier`
202 ,by=list(IdC2$IdExtrap,IdC2$Heure)
203 ,FUN=length)
204
205 test=(as.numeric(colnames(ActMoyH))>12)
206 ColDebut=subset(colnames(ActMoyH),test)
207 ColFin=subset(colnames(ActMoyH),test==F)
208 ListH=c(ColDebut,ColFin)
209 neworder=c("IdExtrap",ColDebut,ColFin)
210 ActMoyH=ActMoyH[,..neworder]
211
212 SummPartH=merge(SummPartshort,ActMoyH,by.x="Code",by.y="IdExtrap")
213 SummPartH=SummPartH[order(TriGroupe,decreasing=T),]
214
215
216 brks <- quantile(ActMoyHA$x, probs = seq(.05, .95, .05), na.rm = TRUE)-1
217 clrs <- round(seq(255, 40, length.out = length(brks) + 1), 0) %>%
218 {paste0("rgb(255,", ., ",", ., ")")}
219
220
221 SummHTMLH=datatable(SummPartH, rownames = FALSE) %>%
222 formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)",
223 background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>%
224 formatStyle(columns=ListH, backgroundColor = styleInterval(brks, clrs))
225
226 saveWidget(SummHTMLH,"output-hourly.html")
227 write.table(SummPartH,"output-hourly.tabular",sep="\t",row.names=F,quote=FALSE)
228
229 }