0
|
1 library(data.table)
|
|
2 library(DT)
|
|
3 library(htmlwidgets)
|
|
4
|
|
5 args <- commandArgs(trailingOnly = TRUE)
|
|
6 #print(args)
|
|
7 EchelleErreur=c("","POSSIBLE","PROBABLE","SUR")
|
|
8 EchelleNumErreur=c(99,50,10,1)
|
|
9
|
|
10 #for test
|
|
11 #inputest=list.files("C:/Users/Yves Bas/Documents/GitHub/65MO_Galaxy-E/raw_scripts/Vigie-Chiro/output_IdValid_input_BilanEnrichi/",pattern="IdC2.csv",full.names=T)
|
|
12 #for (i in 1:length(inputest))
|
|
13 #{
|
|
14 # args=c(inputest[i],"refRPSeuil50.csv","SpeciesList.csv")
|
|
15
|
|
16 IdC2=fread(args[1])
|
|
17 refRP=fread(args[2])
|
|
18 GroupList=fread(args[3])
|
|
19
|
|
20
|
|
21 if(substr(IdC2$`nom du fichier`[1],2,2)!="i")
|
|
22 {
|
|
23 print("Protocole non conforme, ce script doit etre lance pour un protocole Routier ou Pedestre")
|
|
24 }else{
|
|
25
|
|
26 Routier=grepl("-",substr(IdC2$`nom du fichier`[1],4,7))
|
|
27 #compute error risk by species (minimum error among files)
|
|
28 #to be replaced by glm outputs if I'll have time
|
|
29 RisqueErreurT=aggregate(IdC2$IdProb,by=list(IdC2$IdExtrap),FUN=function(x) ceiling((1-max(x-0.0001))*100))
|
|
30 barplot(RisqueErreurT$x,names.arg=RisqueErreurT$Group.1,las=2)
|
|
31 #compute error risk accoring to observer/validator (a little dirty because it relies on alphabetical order of confidence classes: POSSIBLE < PROBABLE < SUR)
|
|
32 RisqueErreurOV0=match(IdC2$ConfV,EchelleErreur)
|
|
33 RisqueErreurOV=aggregate(RisqueErreurOV0,by=list(IdC2$IdExtrap)
|
|
34 ,FUN=max)
|
|
35 RisqueErreurOV2=EchelleNumErreur[RisqueErreurOV$x]
|
|
36 #compute minimum error risk between man and machine
|
|
37 RisqueErreur=pmin(RisqueErreurT$x,RisqueErreurOV2)
|
|
38
|
|
39 #compute number of files validated per species
|
|
40 FichValid=aggregate(IdC2$IdV,by=list(IdC2$IdExtrap,IdC2$'nom du fichier')
|
|
41 ,FUN=function(x) sum(x!=""))
|
|
42 NbValid2=aggregate(FichValid$x,by=list(FichValid$Group.1),FUN=function(x) sum(x>0))
|
|
43
|
|
44 DiffC50=vector() # to store the median of confidence difference between unvalidated records and validated ones
|
|
45 DiffT50=vector() # to store the median of time difference between unvalidated records and validated ones
|
|
46 for (j in 1:nlevels(as.factor(IdC2$IdExtrap)))
|
|
47 {
|
|
48 IdSp=subset(IdC2
|
|
49 ,IdC2$IdExtrap==levels(as.factor(IdC2$IdExtrap))[j])
|
|
50 IdSp$IdProb[is.na(IdSp$IdProb)]=0
|
|
51 IdSp=IdSp[order(IdSp$IdProb),]
|
|
52 IdSpV=subset(IdSp,IdSp$IdV!="")
|
|
53 if(nrow(IdSpV)>0)
|
|
54 {
|
|
55 cuts <- c(-Inf, IdSpV$IdProb[-1]-diff(IdSpV$IdProb)/2, Inf)
|
|
56 CorrC=findInterval(IdSp$IdProb, cuts)
|
|
57 CorrC2=IdSpV$IdProb[CorrC]
|
|
58 DiffC=abs(IdSp$IdProb-CorrC2)
|
|
59 DiffC50=c(DiffC50,median(DiffC))
|
|
60
|
|
61 IdSp=IdSp[order(IdSp$TimeNum),]
|
|
62 IdSpV=subset(IdSp,IdSp$IdV!="")
|
|
63 cuts <- c(-Inf, IdSpV$TimeNum[-1]-diff(IdSpV$TimeNum)/2, Inf)
|
|
64 CorrT=findInterval(IdSp$TimeNum, cuts)
|
|
65 CorrT2=IdSpV$TimeNum[CorrT]
|
|
66 DiffT=abs(IdSp$TimeNum-CorrT2)
|
|
67 DiffT50=c(DiffT50,median(DiffT))
|
|
68 }else{
|
|
69 DiffC50=c(DiffC50,Inf)
|
|
70 DiffT50=c(DiffT50,Inf)
|
|
71 }
|
|
72 }
|
|
73 #compute an index of validation effort per species
|
|
74 EffortV=1/DiffC50/DiffT50
|
|
75 EffortClass=(EffortV>0.0005)+(EffortV>0.005)+RisqueErreurOV$x
|
|
76 #cbind(RisqueErreurOV,EffortV,DiffC50,DiffT50)
|
|
77 barplot(EffortClass-1,names.arg=NbValid2$Group.1,las=2)
|
|
78 ClassEffortV=c("-","FAIBLE","SUFFISANT","SUFFISANT","FORT","FORT")
|
|
79 EffortClassMot=ClassEffortV[EffortClass]
|
|
80
|
|
81
|
|
82 #compare activity / reference frame
|
|
83 FileInfo=as.data.table(tstrsplit(IdC2$`nom du fichier`,"-"))
|
|
84 IdC2$Tron=FileInfo$V4
|
|
85
|
|
86 MicTempsInfo=as.data.table(tstrsplit(as.data.frame(FileInfo)[,(ncol(FileInfo))],"_"))
|
|
87 MicDroit=(as.data.frame(MicTempsInfo)[,(ncol(MicTempsInfo)-2)]=="1")
|
|
88 IdC2$MicDroit=MicDroit
|
|
89
|
|
90 testTempsFin=aggregate(IdC2$temps_fin,by=list(MicDroit),FUN=max)
|
|
91 testTempsFin$Direct=(testTempsFin$x>0.5)
|
|
92 testTF2=sum((testTempsFin$x>0.5))
|
|
93 if(testTF2>1){stop("Probleme stereo : les 2 canaux semblent etre en enregistrement direct")}
|
|
94 IdC2M=merge(IdC2,testTempsFin,by.x="MicDroit",by.y="Group.1")
|
|
95
|
|
96 ActMoy=aggregate(IdC2$`nom du fichier`
|
|
97 ,by=list(IdC2M$IdExtrap,IdC2M$Direct),FUN=length)
|
|
98 ListSpref=match(levels(as.factor(ActMoy$Group.1)),refRP$Espece)
|
|
99 Subref=refRP[ListSpref]
|
|
100 if(Routier)
|
|
101 {
|
|
102 Subref=Subref[,c(1:17)]
|
|
103 }else{
|
|
104 Subref=Subref[,c(1,18:33)]
|
|
105 }
|
|
106 QualifActE=vector()
|
|
107 QualifActD=vector()
|
|
108
|
|
109 for (k in 1:nlevels(as.factor(ActMoy$Group.1)))
|
|
110 {
|
|
111 Actsub=subset(ActMoy,ActMoy$Group.1==levels(as.factor(ActMoy$Group.1))[k])
|
|
112 if(is.na(Subref[k,2]))
|
|
113 {
|
|
114 QualifActE=c(QualifActE,NA)
|
|
115 QualifActD=c(QualifActD,NA)
|
|
116 }else{
|
|
117 ActE=subset(Actsub,Actsub$Group.2==F)
|
|
118 if(nrow(ActE)==0)
|
|
119 {
|
|
120 QualifActE=c(QualifActE,NA)
|
|
121
|
|
122 }else{
|
|
123 cuts=cbind(-Inf,as.numeric(Subref[k,6]),as.numeric(Subref[k,7])
|
|
124 ,as.numeric(Subref[k,8]),Inf)
|
|
125 QualifActE=c(QualifActE,findInterval(ActE$x,cuts,left.open=T))
|
|
126 }
|
|
127 ActD=subset(Actsub,Actsub$Group.2==T)
|
|
128 if(nrow(ActD)==0)
|
|
129 {
|
|
130 QualifActD=c(QualifActD,NA)
|
|
131
|
|
132 }else{
|
|
133 cuts=cbind(-Inf,as.numeric(Subref[k,14]),as.numeric(Subref[k,15])
|
|
134 ,as.numeric(Subref[k,16]),Inf)
|
|
135 QualifActD=c(QualifActD,findInterval(ActD$x,cuts,left.open=T))
|
|
136 }
|
|
137
|
|
138 }
|
|
139 }
|
|
140 ClassAct=c("FAIBLE","MODEREE","FORTE","TRES FORTE")
|
|
141 QualifActMotE=ClassAct[QualifActE]
|
|
142 QualifActMotD=ClassAct[QualifActD]
|
|
143
|
|
144 #compute activity by nights (to be completed)
|
|
145 #ActNuit=aggregate(IdC2M$`nom du fichier`,by=list(IdC2M$DateNuit,IdC2M$IdExtrap),FUN=length)
|
|
146 ActED=dcast(data=ActMoy,formula=Group.1~Group.2,value=x)
|
|
147 ActED[is.na(ActED)]=0
|
|
148 #organize the csv summary
|
|
149 SummPart0=cbind(Esp=levels(as.factor(IdC2M$IdExtrap))
|
|
150 ,RisqueErreur,NbValid=NbValid2$x,EffortValid=EffortClassMot)
|
|
151
|
|
152 test=match("FALSE",colnames(ActED))
|
|
153 if(is.na(test)==F)
|
|
154 {
|
|
155 SummPart0=cbind(SummPart0,Contacts_Expansion=ActED$'FALSE'
|
|
156 ,Niveau_Activite_Expansion=QualifActMotE)
|
|
157 }else{
|
|
158 SummPart0=cbind(SummPart0,Contacts_Expansion=""
|
|
159 ,Niveau_Activite_Expansion="")
|
|
160 }
|
|
161 test=match("TRUE",colnames(ActED))
|
|
162 if(is.na(test)==F)
|
|
163 {
|
|
164
|
|
165 SummPart0=cbind(SummPart0,Contacts_Direct=ActED$'TRUE'
|
|
166 ,Niveau_Activite_Direct=QualifActMotD)
|
|
167 }else{
|
|
168 SummPart0=cbind(SummPart0,Contacts_Direct=""
|
|
169 ,Niveau_Activite_Direct="")
|
|
170 }
|
|
171
|
|
172 InfoSp=c("GroupFR","NomFR","Scientific name","Esp")
|
|
173 GroupShort=GroupList[,..InfoSp]
|
|
174 SummPart=merge(GroupShort,SummPart0,by="Esp")
|
|
175 IndexGroupe=c("Autre","Sauterelle","Chauve-souris")
|
|
176 SummPart$IndexSumm=match(SummPart$GroupFR,IndexGroupe)
|
|
177 SummPart=SummPart[with(SummPart
|
|
178 ,order(IndexSumm,as.numeric(Contacts_Direct),as.numeric(Contacts_Expansion),decreasing=T)),]
|
|
179 colnames(SummPart)=c("Code","Groupe","Nom francais","Nom scientifique"
|
|
180 ,"Risque d'erreur (%)","Nb Validations"
|
|
181 ,"Effort de validation","Nb de Contacts en expansion"
|
|
182 ,"Niveau d'Activite en expansion"
|
|
183 ,"Nb de Contacts en direct"
|
|
184 ,"Niveau d'Activite en direct","TriGroupe")
|
|
185
|
|
186 #to do: extend colors to other columns to improve readability
|
|
187 SummHTML=datatable(SummPart, rownames = FALSE) %>%
|
|
188 formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)",
|
|
189 background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>%
|
|
190 formatStyle(columns = "Effort de validation",
|
|
191 background = styleEqual(c("-","FAIBLE","SUFFISANT","FORT"), c("white", "cyan", "royalblue", "darkblue"))) %>%
|
|
192 formatStyle(columns = c("Nb de Contacts en expansion","Niveau d'Activite en expansion"),valueColumns="Niveau d'Activite en expansion",
|
|
193 background = styleEqual(c("FAIBLE","MODEREE","FORTE","TRES FORTE"), c("palegoldenrod", "greenyellow", "limegreen", "darkgreen"))) %>%
|
|
194 formatStyle(columns = c("Nb de Contacts en direct","Niveau d'Activite en direct"),valueColumns="Niveau d'Activite en direct",
|
|
195 background = styleEqual(c("FAIBLE","MODEREE","FORTE","TRES FORTE"), c("palegoldenrod", "greenyellow", "limegreen", "darkgreen")))
|
|
196
|
|
197
|
|
198 saveWidget(SummHTML,"output-summaryRP.html")
|
|
199 write.table(SummPart,"output-summaryRP.tabular",row.names=F,sep="\t")
|
|
200
|
|
201 #summary for each point/transect
|
|
202
|
|
203 #compute number of files validated per species
|
|
204 IdC2M$Canal=sapply(IdC2M$Direct,FUN=function(x) if(x){"Direct"}else{"Expansion"})
|
|
205
|
|
206 ActMoyTA=aggregate(IdC2M$`nom du fichier`,by=list(IdC2M$IdExtrap,IdC2M$Canal,IdC2M$Session),FUN=length)
|
|
207 ActMoyT=dcast(data=IdC2M,formula=IdExtrap+Canal~Session
|
|
208 ,fun.aggregate=length)
|
|
209 SummPartshort=cbind(SummPart[,c(1:5)],TriGroupe=SummPart[,TriGroupe])
|
|
210 SummPartTron=merge(SummPartshort,ActMoyT,by.x="Code",by.y="IdExtrap")
|
|
211 SummPartTron=SummPartTron[order(TriGroupe,decreasing=T),]
|
|
212
|
|
213 ListSession=levels(as.factor(IdC2M$Session))
|
|
214 brks <- quantile(ActMoyTA$x, probs = seq(.05, .95, .05), na.rm = TRUE)-1
|
|
215 clrs <- round(seq(255, 40, length.out = length(brks) + 1), 0) %>%
|
|
216 {paste0("rgb(255,", ., ",", ., ")")}
|
|
217
|
|
218
|
|
219 #to do: extend colors to other columns to improve readability
|
|
220 SummHTMLT=datatable(SummPartTron, rownames = FALSE) %>%
|
|
221 formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)",
|
|
222 background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>%
|
|
223 formatStyle(columns=ListSession, backgroundColor = styleInterval(brks, clrs))
|
|
224
|
|
225 saveWidget(SummHTMLT,"output-detailRP.html")
|
|
226 write.table(SummPartTron,"output-detailRP.tabular",row.names=F,sep="\t")
|
|
227
|
|
228 #saveWidget(SummHTML,paste0(substr(args[1],1,nchar(args[1])-9),"-summary.html"))
|
|
229 #write.table(SummPart,paste0(substr(args[1],1,nchar(args[1])-9),"-summary.csv"),row.names=F,sep="\t")
|
|
230 #saveWidget(SummHTMLT,paste0(substr(args[1],1,nchar(args[1])-9),"-detail.html"))
|
|
231 #write.table(SummPartTron,paste0(substr(args[1],1,nchar(args[1])-9),"-detail.csv"),row.names=F,sep="\t")
|
|
232
|
|
233
|
|
234
|
|
235 }
|
|
236
|
|
237
|
|
238
|