comparison BilanEnrichiRP.R @ 0:c55e09a8b4c8 draft

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tools/vigiechiro commit d2de8e10c11bfa3b04729e59bba58e08d23b56aa
author ecology
date Wed, 13 Mar 2019 11:17:44 -0400
parents
children be4e28da3919
comparison
equal deleted inserted replaced
-1:000000000000 0:c55e09a8b4c8
1 suppressMessages(library(data.table))
2 suppressMessages(library(DT))
3 suppressMessages(library(htmlwidgets))
4
5 args <- commandArgs(trailingOnly = TRUE)
6 EchelleErreur=c("NA","POSSIBLE","PROBABLE","SUR")
7 EchelleNumErreur=c(99,50,10,1)
8
9
10 IdC2=fread(args[1])
11 refRP=fread(args[2])
12 GroupList=fread(args[3])
13
14
15 if(substr(IdC2$`nom du fichier`[1],2,2)!="i")
16 {
17 stop("Protocole non conforme, ce script doit etre lance pour un protocole Routier ou Pedestre",call.=FALSE)
18 }
19
20 Routier=grepl("-",substr(IdC2$`nom du fichier`[1],4,7))
21 #compute error risk by species (minimum error among files)
22 #to be replaced by glm outputs if I'll have time
23 RisqueErreurT=aggregate(IdC2$IdProb,by=list(IdC2$IdExtrap),FUN=function(x) ceiling((1-max(x-0.0001))*100))
24 #barplot(RisqueErreurT$x,names.arg=RisqueErreurT$Group.1,las=2)
25 #compute error risk accoring to observer/validator (a little dirty because it relies on alphabetical order of confidence classes: POSSIBLE < PROBABLE < SUR)
26 RisqueErreurOV0=match(IdC2$ConfV,EchelleErreur)
27 RisqueErreurOV=aggregate(RisqueErreurOV0,by=list(IdC2$IdExtrap)
28 ,FUN=max)
29 RisqueErreurOV2=EchelleNumErreur[RisqueErreurOV$x]
30 #compute minimum error risk between man and machine
31 RisqueErreur=pmin(RisqueErreurT$x,RisqueErreurOV2,na.rm=TRUE)
32
33 #compute number of files validated per species
34 FichValid=aggregate(IdC2$IdV,by=list(IdC2$IdExtrap,IdC2$'nom du fichier')
35 ,FUN=function(x) sum(!is.na(x)))
36 NbValid2=aggregate(FichValid$x,by=list(FichValid$Group.1),FUN=function(x) sum(x>0))
37
38 DiffC50=vector() # to store the median of confidence difference between unvalidated records and validated ones
39 DiffT50=vector() # to store the median of time difference between unvalidated records and validated ones
40 for (j in 1:nlevels(as.factor(IdC2$IdExtrap)))
41 {
42 IdSp=subset(IdC2
43 ,IdC2$IdExtrap==levels(as.factor(IdC2$IdExtrap))[j])
44 IdSp$IdProb[is.na(IdSp$IdProb)]=0
45 IdSp=IdSp[order(IdSp$IdProb),]
46 IdSpV=subset(IdSp,!is.na(IdSp$IdV))
47 if(nrow(IdSpV)>0)
48 {
49 cuts <- c(-Inf, IdSpV$IdProb[-1]-diff(IdSpV$IdProb)/2, Inf)
50 CorrC=findInterval(IdSp$IdProb, cuts)
51 CorrC2=IdSpV$IdProb[CorrC]
52 DiffC=abs(IdSp$IdProb-CorrC2)
53 DiffC50=c(DiffC50,median(DiffC))
54
55 IdSp=IdSp[order(IdSp$TimeNum),]
56 IdSpV=subset(IdSp,!is.na(IdSp$IdV))
57 cuts <- c(-Inf, IdSpV$TimeNum[-1]-diff(IdSpV$TimeNum)/2, Inf)
58 CorrT=findInterval(IdSp$TimeNum, cuts)
59 CorrT2=IdSpV$TimeNum[CorrT]
60 DiffT=abs(IdSp$TimeNum-CorrT2)
61 DiffT50=c(DiffT50,median(DiffT))
62 }else{
63 DiffC50=c(DiffC50,Inf)
64 DiffT50=c(DiffT50,Inf)
65 }
66 }
67 #compute an index of validation effort per species
68 EffortV=1/DiffC50/DiffT50
69 EffortClass=(EffortV>0.0005)+(EffortV>0.005)+RisqueErreurOV$x
70 #cbind(RisqueErreurOV,EffortV,DiffC50,DiffT50)
71 #barplot(EffortClass-1,names.arg=NbValid2$Group.1,las=2)
72 ClassEffortV=c("-","FAIBLE","SUFFISANT","SUFFISANT","FORT","FORT")
73 EffortClassMot=ClassEffortV[EffortClass]
74
75
76 #compare activity / reference frame
77 FileInfo=as.data.table(tstrsplit(IdC2$`nom du fichier`,"-"))
78 IdC2$Tron=FileInfo$V4
79
80 MicTempsInfo=as.data.table(tstrsplit(as.data.frame(FileInfo)[,(ncol(FileInfo))],"_"))
81 MicDroit=(as.data.frame(MicTempsInfo)[,(ncol(MicTempsInfo)-2)]=="1")
82 IdC2$MicDroit=MicDroit
83
84 testTempsFin=aggregate(IdC2$temps_fin,by=list(MicDroit),FUN=max)
85 testTempsFin$Direct=(testTempsFin$x>0.5)
86 testTF2=sum((testTempsFin$x>0.5))
87 if(testTF2>1){stop("Probleme stereo : les 2 canaux semblent etre en enregistrement direct")}
88 IdC2M=merge(IdC2,testTempsFin,by.x="MicDroit",by.y="Group.1")
89
90 ActMoy=aggregate(IdC2$`nom du fichier`
91 ,by=list(IdC2M$IdExtrap,IdC2M$Direct),FUN=length)
92 ListSpref=match(levels(as.factor(ActMoy$Group.1)),refRP$Espece)
93 Subref=refRP[ListSpref]
94 if(Routier)
95 {
96 Subref=Subref[,c(1:17)]
97 }else{
98 Subref=Subref[,c(1,18:33)]
99 }
100 QualifActE=vector()
101 QualifActD=vector()
102
103 for (k in 1:nlevels(as.factor(ActMoy$Group.1)))
104 {
105 Actsub=subset(ActMoy,ActMoy$Group.1==levels(as.factor(ActMoy$Group.1))[k])
106 if(is.na(Subref[k,2]))
107 {
108 QualifActE=c(QualifActE,NA)
109 QualifActD=c(QualifActD,NA)
110 }else{
111 ActE=subset(Actsub,Actsub$Group.2==F)
112 if(nrow(ActE)==0)
113 {
114 QualifActE=c(QualifActE,NA)
115
116 }else{
117 cuts=cbind(-Inf,as.numeric(Subref[k,6]),as.numeric(Subref[k,7])
118 ,as.numeric(Subref[k,8]),Inf)
119 QualifActE=c(QualifActE,findInterval(ActE$x,cuts,left.open=T))
120 }
121 ActD=subset(Actsub,Actsub$Group.2==T)
122 if(nrow(ActD)==0)
123 {
124 QualifActD=c(QualifActD,NA)
125
126 }else{
127 cuts=cbind(-Inf,as.numeric(Subref[k,14]),as.numeric(Subref[k,15])
128 ,as.numeric(Subref[k,16]),Inf)
129 QualifActD=c(QualifActD,findInterval(ActD$x,cuts,left.open=T))
130 }
131
132 }
133 }
134 ClassAct=c("FAIBLE","MODEREE","FORTE","TRES FORTE")
135 QualifActMotE=ClassAct[QualifActE]
136 QualifActMotD=ClassAct[QualifActD]
137
138 #compute activity by nights (to be completed)
139 #ActNuit=aggregate(IdC2M$`nom du fichier`,by=list(IdC2M$DateNuit,IdC2M$IdExtrap),FUN=length)
140 ActED=dcast(data=ActMoy,formula=Group.1~Group.2,value=x)
141 ActED[is.na(ActED)]=0
142 #organize the csv summary
143 SummPart0=cbind(Esp=levels(as.factor(IdC2M$IdExtrap))
144 ,RisqueErreur,NbValid=NbValid2$x,EffortValid=EffortClassMot)
145
146 test=match("FALSE",colnames(ActED))
147 if(is.na(test)==F)
148 {
149 SummPart0=cbind(SummPart0,Contacts_Expansion=ActED$'FALSE'
150 ,Niveau_Activite_Expansion=QualifActMotE)
151 }else{
152 SummPart0=cbind(SummPart0,Contacts_Expansion=""
153 ,Niveau_Activite_Expansion="")
154 }
155 test=match("TRUE",colnames(ActED))
156 if(is.na(test)==F)
157 {
158
159 SummPart0=cbind(SummPart0,Contacts_Direct=ActED$'TRUE'
160 ,Niveau_Activite_Direct=QualifActMotD)
161 }else{
162 SummPart0=cbind(SummPart0,Contacts_Direct=""
163 ,Niveau_Activite_Direct="")
164 }
165
166 InfoSp=c("GroupFR","NomFR","Scientific name","Esp")
167 GroupShort=GroupList[,..InfoSp]
168 SummPart=merge(GroupShort,SummPart0,by="Esp")
169 IndexGroupe=c("Autre","Sauterelle","Chauve-souris")
170 SummPart$IndexSumm=match(SummPart$GroupFR,IndexGroupe)
171 SummPart=SummPart[with(SummPart
172 ,order(IndexSumm,as.numeric(Contacts_Direct),as.numeric(Contacts_Expansion),decreasing=T)),]
173 colnames(SummPart)=c("Code","Groupe","Nom francais","Nom scientifique"
174 ,"Risque d'erreur (%)","Nb Validations"
175 ,"Effort de validation","Nb de Contacts en expansion"
176 ,"Niveau d'Activite en expansion"
177 ,"Nb de Contacts en direct"
178 ,"Niveau d'Activite en direct","TriGroupe")
179
180 #to do: extend colors to other columns to improve readability
181 SummHTML=datatable(SummPart, rownames = FALSE) %>%
182 formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)",
183 background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>%
184 formatStyle(columns = "Effort de validation",
185 background = styleEqual(c("-","FAIBLE","SUFFISANT","FORT"), c("white", "cyan", "royalblue", "darkblue"))) %>%
186 formatStyle(columns = c("Nb de Contacts en expansion","Niveau d'Activite en expansion"),valueColumns="Niveau d'Activite en expansion",
187 background = styleEqual(c("FAIBLE","MODEREE","FORTE","TRES FORTE"), c("palegoldenrod", "greenyellow", "limegreen", "darkgreen"))) %>%
188 formatStyle(columns = c("Nb de Contacts en direct","Niveau d'Activite en direct"),valueColumns="Niveau d'Activite en direct",
189 background = styleEqual(c("FAIBLE","MODEREE","FORTE","TRES FORTE"), c("palegoldenrod", "greenyellow", "limegreen", "darkgreen")))
190
191
192 saveWidget(SummHTML,"output-summaryRP.html")
193 write.table(SummPart,"output-summaryRP.tabular",row.names=F,sep="\t",quote=FALSE)
194
195 #summary for each point/transect
196
197 #compute number of files validated per species
198 IdC2M$Canal=sapply(IdC2M$Direct,FUN=function(x) if(x){"Direct"}else{"Expansion"})
199
200 ActMoyTA=aggregate(IdC2M$`nom du fichier`,by=list(IdC2M$IdExtrap,IdC2M$Canal,IdC2M$Session),FUN=length)
201 ActMoyT=dcast(data=IdC2M,formula=IdExtrap+Canal~Session
202 ,fun.aggregate=length)
203 SummPartshort=cbind(SummPart[,c(1:5)],TriGroupe=SummPart[,TriGroupe])
204 SummPartTron=merge(SummPartshort,ActMoyT,by.x="Code",by.y="IdExtrap")
205 SummPartTron=SummPartTron[order(TriGroupe,decreasing=T),]
206
207 ListSession=levels(as.factor(IdC2M$Session))
208 brks <- quantile(ActMoyTA$x, probs = seq(.05, .95, .05), na.rm = TRUE)-1
209 clrs <- round(seq(255, 40, length.out = length(brks) + 1), 0) %>%
210 {paste0("rgb(255,", ., ",", ., ")")}
211
212
213 #to do: extend colors to other columns to improve readability
214 SummHTMLT=datatable(SummPartTron, rownames = FALSE) %>%
215 formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)",
216 background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>%
217 formatStyle(columns=ListSession, backgroundColor = styleInterval(brks, clrs))
218
219 saveWidget(SummHTMLT,"output-detailRP.html")
220 write.table(SummPartTron,"output-detailRP.tabular",row.names=F,sep="\t",quote=FALSE)