Mercurial > repos > mnhn65mo > vigiechiro
comparison BilanEnrichiRP.R @ 0:0e3db3a308c0 draft default tip
Uploaded
author | mnhn65mo |
---|---|
date | Mon, 06 Aug 2018 09:13:29 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0e3db3a308c0 |
---|---|
1 library(data.table) | |
2 library(DT) | |
3 library(htmlwidgets) | |
4 | |
5 args <- commandArgs(trailingOnly = TRUE) | |
6 #print(args) | |
7 EchelleErreur=c("","POSSIBLE","PROBABLE","SUR") | |
8 EchelleNumErreur=c(99,50,10,1) | |
9 | |
10 #for test | |
11 #inputest=list.files("C:/Users/Yves Bas/Documents/GitHub/65MO_Galaxy-E/raw_scripts/Vigie-Chiro/output_IdValid_input_BilanEnrichi/",pattern="IdC2.csv",full.names=T) | |
12 #for (i in 1:length(inputest)) | |
13 #{ | |
14 # args=c(inputest[i],"refRPSeuil50.csv","SpeciesList.csv") | |
15 | |
16 IdC2=fread(args[1]) | |
17 refRP=fread(args[2]) | |
18 GroupList=fread(args[3]) | |
19 | |
20 | |
21 if(substr(IdC2$`nom du fichier`[1],2,2)!="i") | |
22 { | |
23 print("Protocole non conforme, ce script doit etre lance pour un protocole Routier ou Pedestre") | |
24 }else{ | |
25 | |
26 Routier=grepl("-",substr(IdC2$`nom du fichier`[1],4,7)) | |
27 #compute error risk by species (minimum error among files) | |
28 #to be replaced by glm outputs if I'll have time | |
29 RisqueErreurT=aggregate(IdC2$IdProb,by=list(IdC2$IdExtrap),FUN=function(x) ceiling((1-max(x-0.0001))*100)) | |
30 barplot(RisqueErreurT$x,names.arg=RisqueErreurT$Group.1,las=2) | |
31 #compute error risk accoring to observer/validator (a little dirty because it relies on alphabetical order of confidence classes: POSSIBLE < PROBABLE < SUR) | |
32 RisqueErreurOV0=match(IdC2$ConfV,EchelleErreur) | |
33 RisqueErreurOV=aggregate(RisqueErreurOV0,by=list(IdC2$IdExtrap) | |
34 ,FUN=max) | |
35 RisqueErreurOV2=EchelleNumErreur[RisqueErreurOV$x] | |
36 #compute minimum error risk between man and machine | |
37 RisqueErreur=pmin(RisqueErreurT$x,RisqueErreurOV2) | |
38 | |
39 #compute number of files validated per species | |
40 FichValid=aggregate(IdC2$IdV,by=list(IdC2$IdExtrap,IdC2$'nom du fichier') | |
41 ,FUN=function(x) sum(x!="")) | |
42 NbValid2=aggregate(FichValid$x,by=list(FichValid$Group.1),FUN=function(x) sum(x>0)) | |
43 | |
44 DiffC50=vector() # to store the median of confidence difference between unvalidated records and validated ones | |
45 DiffT50=vector() # to store the median of time difference between unvalidated records and validated ones | |
46 for (j in 1:nlevels(as.factor(IdC2$IdExtrap))) | |
47 { | |
48 IdSp=subset(IdC2 | |
49 ,IdC2$IdExtrap==levels(as.factor(IdC2$IdExtrap))[j]) | |
50 IdSp$IdProb[is.na(IdSp$IdProb)]=0 | |
51 IdSp=IdSp[order(IdSp$IdProb),] | |
52 IdSpV=subset(IdSp,IdSp$IdV!="") | |
53 if(nrow(IdSpV)>0) | |
54 { | |
55 cuts <- c(-Inf, IdSpV$IdProb[-1]-diff(IdSpV$IdProb)/2, Inf) | |
56 CorrC=findInterval(IdSp$IdProb, cuts) | |
57 CorrC2=IdSpV$IdProb[CorrC] | |
58 DiffC=abs(IdSp$IdProb-CorrC2) | |
59 DiffC50=c(DiffC50,median(DiffC)) | |
60 | |
61 IdSp=IdSp[order(IdSp$TimeNum),] | |
62 IdSpV=subset(IdSp,IdSp$IdV!="") | |
63 cuts <- c(-Inf, IdSpV$TimeNum[-1]-diff(IdSpV$TimeNum)/2, Inf) | |
64 CorrT=findInterval(IdSp$TimeNum, cuts) | |
65 CorrT2=IdSpV$TimeNum[CorrT] | |
66 DiffT=abs(IdSp$TimeNum-CorrT2) | |
67 DiffT50=c(DiffT50,median(DiffT)) | |
68 }else{ | |
69 DiffC50=c(DiffC50,Inf) | |
70 DiffT50=c(DiffT50,Inf) | |
71 } | |
72 } | |
73 #compute an index of validation effort per species | |
74 EffortV=1/DiffC50/DiffT50 | |
75 EffortClass=(EffortV>0.0005)+(EffortV>0.005)+RisqueErreurOV$x | |
76 #cbind(RisqueErreurOV,EffortV,DiffC50,DiffT50) | |
77 barplot(EffortClass-1,names.arg=NbValid2$Group.1,las=2) | |
78 ClassEffortV=c("-","FAIBLE","SUFFISANT","SUFFISANT","FORT","FORT") | |
79 EffortClassMot=ClassEffortV[EffortClass] | |
80 | |
81 | |
82 #compare activity / reference frame | |
83 FileInfo=as.data.table(tstrsplit(IdC2$`nom du fichier`,"-")) | |
84 IdC2$Tron=FileInfo$V4 | |
85 | |
86 MicTempsInfo=as.data.table(tstrsplit(as.data.frame(FileInfo)[,(ncol(FileInfo))],"_")) | |
87 MicDroit=(as.data.frame(MicTempsInfo)[,(ncol(MicTempsInfo)-2)]=="1") | |
88 IdC2$MicDroit=MicDroit | |
89 | |
90 testTempsFin=aggregate(IdC2$temps_fin,by=list(MicDroit),FUN=max) | |
91 testTempsFin$Direct=(testTempsFin$x>0.5) | |
92 testTF2=sum((testTempsFin$x>0.5)) | |
93 if(testTF2>1){stop("Probleme stereo : les 2 canaux semblent etre en enregistrement direct")} | |
94 IdC2M=merge(IdC2,testTempsFin,by.x="MicDroit",by.y="Group.1") | |
95 | |
96 ActMoy=aggregate(IdC2$`nom du fichier` | |
97 ,by=list(IdC2M$IdExtrap,IdC2M$Direct),FUN=length) | |
98 ListSpref=match(levels(as.factor(ActMoy$Group.1)),refRP$Espece) | |
99 Subref=refRP[ListSpref] | |
100 if(Routier) | |
101 { | |
102 Subref=Subref[,c(1:17)] | |
103 }else{ | |
104 Subref=Subref[,c(1,18:33)] | |
105 } | |
106 QualifActE=vector() | |
107 QualifActD=vector() | |
108 | |
109 for (k in 1:nlevels(as.factor(ActMoy$Group.1))) | |
110 { | |
111 Actsub=subset(ActMoy,ActMoy$Group.1==levels(as.factor(ActMoy$Group.1))[k]) | |
112 if(is.na(Subref[k,2])) | |
113 { | |
114 QualifActE=c(QualifActE,NA) | |
115 QualifActD=c(QualifActD,NA) | |
116 }else{ | |
117 ActE=subset(Actsub,Actsub$Group.2==F) | |
118 if(nrow(ActE)==0) | |
119 { | |
120 QualifActE=c(QualifActE,NA) | |
121 | |
122 }else{ | |
123 cuts=cbind(-Inf,as.numeric(Subref[k,6]),as.numeric(Subref[k,7]) | |
124 ,as.numeric(Subref[k,8]),Inf) | |
125 QualifActE=c(QualifActE,findInterval(ActE$x,cuts,left.open=T)) | |
126 } | |
127 ActD=subset(Actsub,Actsub$Group.2==T) | |
128 if(nrow(ActD)==0) | |
129 { | |
130 QualifActD=c(QualifActD,NA) | |
131 | |
132 }else{ | |
133 cuts=cbind(-Inf,as.numeric(Subref[k,14]),as.numeric(Subref[k,15]) | |
134 ,as.numeric(Subref[k,16]),Inf) | |
135 QualifActD=c(QualifActD,findInterval(ActD$x,cuts,left.open=T)) | |
136 } | |
137 | |
138 } | |
139 } | |
140 ClassAct=c("FAIBLE","MODEREE","FORTE","TRES FORTE") | |
141 QualifActMotE=ClassAct[QualifActE] | |
142 QualifActMotD=ClassAct[QualifActD] | |
143 | |
144 #compute activity by nights (to be completed) | |
145 #ActNuit=aggregate(IdC2M$`nom du fichier`,by=list(IdC2M$DateNuit,IdC2M$IdExtrap),FUN=length) | |
146 ActED=dcast(data=ActMoy,formula=Group.1~Group.2,value=x) | |
147 ActED[is.na(ActED)]=0 | |
148 #organize the csv summary | |
149 SummPart0=cbind(Esp=levels(as.factor(IdC2M$IdExtrap)) | |
150 ,RisqueErreur,NbValid=NbValid2$x,EffortValid=EffortClassMot) | |
151 | |
152 test=match("FALSE",colnames(ActED)) | |
153 if(is.na(test)==F) | |
154 { | |
155 SummPart0=cbind(SummPart0,Contacts_Expansion=ActED$'FALSE' | |
156 ,Niveau_Activite_Expansion=QualifActMotE) | |
157 }else{ | |
158 SummPart0=cbind(SummPart0,Contacts_Expansion="" | |
159 ,Niveau_Activite_Expansion="") | |
160 } | |
161 test=match("TRUE",colnames(ActED)) | |
162 if(is.na(test)==F) | |
163 { | |
164 | |
165 SummPart0=cbind(SummPart0,Contacts_Direct=ActED$'TRUE' | |
166 ,Niveau_Activite_Direct=QualifActMotD) | |
167 }else{ | |
168 SummPart0=cbind(SummPart0,Contacts_Direct="" | |
169 ,Niveau_Activite_Direct="") | |
170 } | |
171 | |
172 InfoSp=c("GroupFR","NomFR","Scientific name","Esp") | |
173 GroupShort=GroupList[,..InfoSp] | |
174 SummPart=merge(GroupShort,SummPart0,by="Esp") | |
175 IndexGroupe=c("Autre","Sauterelle","Chauve-souris") | |
176 SummPart$IndexSumm=match(SummPart$GroupFR,IndexGroupe) | |
177 SummPart=SummPart[with(SummPart | |
178 ,order(IndexSumm,as.numeric(Contacts_Direct),as.numeric(Contacts_Expansion),decreasing=T)),] | |
179 colnames(SummPart)=c("Code","Groupe","Nom francais","Nom scientifique" | |
180 ,"Risque d'erreur (%)","Nb Validations" | |
181 ,"Effort de validation","Nb de Contacts en expansion" | |
182 ,"Niveau d'Activite en expansion" | |
183 ,"Nb de Contacts en direct" | |
184 ,"Niveau d'Activite en direct","TriGroupe") | |
185 | |
186 #to do: extend colors to other columns to improve readability | |
187 SummHTML=datatable(SummPart, rownames = FALSE) %>% | |
188 formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)", | |
189 background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>% | |
190 formatStyle(columns = "Effort de validation", | |
191 background = styleEqual(c("-","FAIBLE","SUFFISANT","FORT"), c("white", "cyan", "royalblue", "darkblue"))) %>% | |
192 formatStyle(columns = c("Nb de Contacts en expansion","Niveau d'Activite en expansion"),valueColumns="Niveau d'Activite en expansion", | |
193 background = styleEqual(c("FAIBLE","MODEREE","FORTE","TRES FORTE"), c("palegoldenrod", "greenyellow", "limegreen", "darkgreen"))) %>% | |
194 formatStyle(columns = c("Nb de Contacts en direct","Niveau d'Activite en direct"),valueColumns="Niveau d'Activite en direct", | |
195 background = styleEqual(c("FAIBLE","MODEREE","FORTE","TRES FORTE"), c("palegoldenrod", "greenyellow", "limegreen", "darkgreen"))) | |
196 | |
197 | |
198 saveWidget(SummHTML,"output-summaryRP.html") | |
199 write.table(SummPart,"output-summaryRP.tabular",row.names=F,sep="\t") | |
200 | |
201 #summary for each point/transect | |
202 | |
203 #compute number of files validated per species | |
204 IdC2M$Canal=sapply(IdC2M$Direct,FUN=function(x) if(x){"Direct"}else{"Expansion"}) | |
205 | |
206 ActMoyTA=aggregate(IdC2M$`nom du fichier`,by=list(IdC2M$IdExtrap,IdC2M$Canal,IdC2M$Session),FUN=length) | |
207 ActMoyT=dcast(data=IdC2M,formula=IdExtrap+Canal~Session | |
208 ,fun.aggregate=length) | |
209 SummPartshort=cbind(SummPart[,c(1:5)],TriGroupe=SummPart[,TriGroupe]) | |
210 SummPartTron=merge(SummPartshort,ActMoyT,by.x="Code",by.y="IdExtrap") | |
211 SummPartTron=SummPartTron[order(TriGroupe,decreasing=T),] | |
212 | |
213 ListSession=levels(as.factor(IdC2M$Session)) | |
214 brks <- quantile(ActMoyTA$x, probs = seq(.05, .95, .05), na.rm = TRUE)-1 | |
215 clrs <- round(seq(255, 40, length.out = length(brks) + 1), 0) %>% | |
216 {paste0("rgb(255,", ., ",", ., ")")} | |
217 | |
218 | |
219 #to do: extend colors to other columns to improve readability | |
220 SummHTMLT=datatable(SummPartTron, rownames = FALSE) %>% | |
221 formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)", | |
222 background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>% | |
223 formatStyle(columns=ListSession, backgroundColor = styleInterval(brks, clrs)) | |
224 | |
225 saveWidget(SummHTMLT,"output-detailRP.html") | |
226 write.table(SummPartTron,"output-detailRP.tabular",row.names=F,sep="\t") | |
227 | |
228 #saveWidget(SummHTML,paste0(substr(args[1],1,nchar(args[1])-9),"-summary.html")) | |
229 #write.table(SummPart,paste0(substr(args[1],1,nchar(args[1])-9),"-summary.csv"),row.names=F,sep="\t") | |
230 #saveWidget(SummHTMLT,paste0(substr(args[1],1,nchar(args[1])-9),"-detail.html")) | |
231 #write.table(SummPartTron,paste0(substr(args[1],1,nchar(args[1])-9),"-detail.csv"),row.names=F,sep="\t") | |
232 | |
233 | |
234 | |
235 } | |
236 | |
237 | |
238 |