0
|
1 library(data.table)
|
|
2
|
|
3 ValidHier=function(x,y) #used to write validator id over observer id
|
|
4 {
|
|
5 if(y==""){x}else{y}
|
|
6 }
|
|
7
|
|
8 f2p <- function(x) #get date-time data from recording file names
|
|
9 {
|
|
10 if (is.data.frame((x)[1])) {pretemps <- vector(length = nrow(x))}
|
|
11 op <- options(digits.secs = 3)
|
|
12 pretemps <- paste(substr(x, nchar(x) - 18, nchar(x)-4), ".", substr(x, nchar(x) - 2, nchar(x)), sep = "")
|
|
13 strptime(pretemps, "%Y%m%d_%H%M%OS",tz="UTC")-7200
|
|
14 }
|
|
15
|
|
16 args <- commandArgs(trailingOnly = TRUE)
|
|
17
|
|
18
|
|
19 #print(args)
|
|
20
|
|
21
|
|
22 #for test
|
|
23 #inputest=list.files("C:/Users/Yves Bas/Documents/GitHub/65MO_Galaxy-E/raw_scripts/Vigie-Chiro/output_IdCorrect_2ndLayer_input_IdValid/",full.names=T)
|
|
24 #for (i in 1:length(inputest))
|
|
25 #{
|
|
26 #args=c(inputest[i],"Referentiel_seuils_C2.csv")
|
|
27 #args=c("5857d56d9ebce1000ed89ea7-DataCorrC2.csv","Referentiel_seuils_C2.csv")
|
|
28
|
|
29
|
|
30
|
|
31 IdCorrect=fread(args[1])
|
|
32 RefSeuil=fread(args[2])
|
|
33 #IdV=as.data.frame(subset(IdCorrect,select=observateur_taxon:validateur_probabilite))
|
|
34
|
|
35 #Step 0 :compute id score from 2nd Layer
|
|
36 test=match("participation",names(IdCorrect))
|
|
37 IdCorrect$IdScore=apply(as.data.frame(IdCorrect)[,(test+1):(ncol(IdCorrect)-1)],MARGIN=1,FUN=max)
|
|
38 #compute true success probabilities according to logistic regression issued from "Referentiel_seuils"
|
|
39 CorrSp=match(IdCorrect$ProbEsp_C2bs,RefSeuil$Espece)
|
|
40 PSp=RefSeuil$Pente[CorrSp]
|
|
41 ISp=RefSeuil$Int[CorrSp]
|
|
42
|
|
43 suppressWarnings(IdCorrect$IdProb<-mapply(FUN=function(w,x,y) if((!is.na(y))&(y>0)&(y<1000)) {(exp(y*w+x)/(1+exp(y*w+x)))}else{w} ,IdCorrect$IdScore,ISp,PSp))
|
|
44
|
|
45
|
|
46
|
|
47
|
|
48 #Step 1 :compute id with confidence regarding a hierarchy (validator > observer)
|
|
49 IdCorrect$IdV=mapply(ValidHier,IdCorrect$observateur_taxon,IdCorrect$validateur_taxon)
|
|
50 IdCorrect$ConfV=mapply(ValidHier,IdCorrect$observateur_probabilite
|
|
51 ,IdCorrect$validateur_probabilite)
|
|
52
|
|
53
|
|
54 #print(paste(args[1],length(subset(IdCorrect$ConfV,IdCorrect$ConfV!=""))))
|
|
55
|
|
56 #Step 2: Get numerictime data
|
|
57 suppressWarnings(IdCorrect$Session<-NULL)
|
|
58 suppressWarnings(IdCorrect$TimeNum<-NULL)
|
|
59
|
|
60 if (substr(IdCorrect$`nom du fichier`[1],2,2)=="i") #for car/walk transects
|
|
61 {
|
|
62 FileInfo=as.data.table(tstrsplit(IdCorrect$`nom du fichier`,"-"))
|
|
63 IdCorrect$Session=as.numeric(substr(FileInfo$V4,5,nchar(FileInfo$V4)))
|
|
64 TimeSec=as.data.table(tstrsplit(FileInfo$V5,"_"))
|
|
65 TimeSec=as.data.frame(TimeSec)
|
|
66 if(sum(TimeSec[,(ncol(TimeSec)-1)]!="00000")==0) #to deal with double Kaleidoscope treatments
|
|
67 {
|
|
68 print("NOMS DE FICHIERS NON CONFORMES")
|
|
69 print("Vous les avez probablement traiter 2 fois par Kaleidoscope")
|
|
70 stop("Merci de nous signaler cette erreur par mail pour correction")
|
|
71 }else{
|
|
72 IdCorrect$TimeNum=(IdCorrect$Session*800
|
|
73 +as.numeric(TimeSec[,(ncol(TimeSec)-1)])
|
|
74 +as.numeric(TimeSec[,(ncol(TimeSec))])/1000)
|
|
75 }
|
|
76
|
|
77 }else{
|
|
78 if(substr(IdCorrect$`nom du fichier`[1],2,2)=="a") #for stationary recordings
|
|
79 {
|
|
80 DateRec=as.POSIXlt(f2p(IdCorrect$`nom du fichier`))
|
|
81 Nuit=format(as.Date(DateRec-43200*(DateRec$hour<12)),format="%d/%m/%Y")
|
|
82 #Nuit[is.na(Nuit)]=0
|
|
83 IdCorrect$Session=Nuit
|
|
84 IdCorrect$TimeNum=as.numeric(DateRec)
|
|
85
|
|
86 }else{
|
|
87 print("NOMS DE FICHIERS NON CONFORMES")
|
|
88 stop("Ils doivent commencer par Cir (routier/pedestre) ou par Car (points fixes")
|
|
89 }
|
|
90 }
|
|
91
|
|
92
|
|
93
|
|
94
|
|
95 #Step 3 :treat sequentially each species identified by Tadarida-C
|
|
96 IdExtrap=vector() #to store the id extrapolated from validations
|
|
97 IdC2=IdCorrect[0,] #to store data in the right order
|
|
98 TypeE=vector() #to store the type of extrapolation made
|
|
99 for (j in 1:nlevels(as.factor(IdCorrect$ProbEsp_C2bs)))
|
|
100 {
|
|
101 IdSp=subset(IdCorrect
|
|
102 ,IdCorrect$ProbEsp_C2bs==levels(as.factor(IdCorrect$ProbEsp_C2bs))[j])
|
|
103 if(sum(IdSp$IdV=="")==(nrow(IdSp))) #case 1 : no validation no change
|
|
104 {
|
|
105 IdC2=rbind(IdC2,IdSp)
|
|
106 IdExtrap=c(IdExtrap,rep(IdSp$ProbEsp_C2bs[1],nrow(IdSp)))
|
|
107 TypeE=c(TypeE,rep(0,nrow(IdSp)))
|
|
108 }else{ #case 2: some validation
|
|
109 Vtemp=subset(IdSp,IdSp$IdV!="")
|
|
110 #case2A: validations are homogeneous
|
|
111 if(nlevels(as.factor(Vtemp$IdV))==1)
|
|
112 {
|
|
113 IdC2=rbind(IdC2,IdSp)
|
|
114 IdExtrap=c(IdExtrap,rep(Vtemp$IdV[1],nrow(IdSp)))
|
|
115 TypeE=c(TypeE,rep(2,nrow(IdSp)))
|
|
116 }else{
|
|
117 #case 2B: validations are heterogeneous
|
|
118 #case 2B1: some validations confirms the species identified by Tadarida and highest confidence are confirmed
|
|
119 subVT=subset(Vtemp,Vtemp$IdV==levels(as.factor(IdCorrect$ProbEsp_C2bs))[j])
|
|
120 subVF=subset(Vtemp,Vtemp$IdV!=levels(as.factor(IdCorrect$ProbEsp_C2bs))[j])
|
|
121 if((nrow(subVT)>0)&(max(subVT$IdProb)>max(subVF$IdProb)))
|
|
122 {
|
|
123 Vtemp=Vtemp[order(Vtemp$IdProb),]
|
|
124 test=(Vtemp$IdV!=Vtemp$ProbEsp_C2bs)
|
|
125 Fr1=max(which(test == TRUE)) #find the error with highest indices
|
|
126 Thr1=mean(Vtemp$IdProb[(Fr1):(Fr1+1)]) #define first threshold as the median confidence between the first error and the confirmed ID right over it
|
|
127 #id over this threshold are considered right
|
|
128 IdHC=subset(IdSp,IdSp$IdProb>Thr1)
|
|
129 IdC2=rbind(IdC2,IdHC)
|
|
130 IdExtrap=c(IdExtrap,rep(Vtemp$IdV[nrow(Vtemp)],nrow(IdHC)))
|
|
131 TypeE=c(TypeE,rep(2,nrow(IdHC)))
|
|
132 #id under this threshold are attributed to validated id closest in time
|
|
133 Vtemp=Vtemp[order(Vtemp$TimeNum),]
|
|
134 cuts <- c(-Inf, Vtemp$TimeNum[-1]-diff(Vtemp$TimeNum)/2, Inf)
|
|
135 CorrV=findInterval(IdSp$TimeNum, cuts)
|
|
136 IdE=Vtemp$IdV[CorrV]
|
|
137 IdEL=subset(IdE,IdSp$IdProb<=Thr1)
|
|
138 IdLC=subset(IdSp,IdSp$IdProb<=Thr1)
|
|
139 IdExtrap=c(IdExtrap,IdEL)
|
|
140 TypeE=c(TypeE,rep(1,length(IdEL)))
|
|
141 IdC2=rbind(IdC2,IdLC)
|
|
142
|
|
143
|
|
144 }else{
|
|
145 #case 2B2: all validations concerns errors
|
|
146 #id are extrapolated on time only
|
|
147 Vtemp=Vtemp[order(Vtemp$TimeNum),]
|
|
148 cuts <- c(-Inf, Vtemp$TimeNum[-1]-diff(Vtemp$TimeNum)/2, Inf)
|
|
149 CorrV=findInterval(IdSp$TimeNum, cuts)
|
|
150 IdE=Vtemp$IdV[CorrV]
|
|
151 IdExtrap=c(IdExtrap,IdE)
|
|
152 TypeE=c(TypeE,rep(1,length(IdE)))
|
|
153 IdC2=rbind(IdC2,IdSp)
|
|
154 }
|
|
155 }
|
|
156
|
|
157
|
|
158 }
|
|
159
|
|
160
|
|
161 }
|
|
162 test1=(nrow(IdC2)==length(IdExtrap))
|
|
163 test2=(nrow(IdC2)==nrow(IdCorrect))
|
|
164 if((test1==F)|(test2==F))
|
|
165 {
|
|
166 (stop("Erreur de traitement !!!"))
|
|
167 }
|
|
168
|
|
169 IdC2$IdExtrap=IdExtrap
|
|
170 IdC2$TypeE=TypeE
|
|
171
|
|
172
|
|
173 IdC2=IdC2[order(IdC2$IdProb,decreasing=T),]
|
|
174 IdC2=IdC2[order(IdC2$ConfV,decreasing=T),]
|
|
175 IdC2=IdC2[order(IdC2$`nom du fichier`),]
|
|
176 #discard duplicated species within the same files (= false positives corrected by 2nd layer)
|
|
177 IdC2=unique(IdC2,by=c("nom du fichier","IdExtrap"))
|
|
178
|
|
179 write.table(IdC2,"output.tabular",row.names=F,sep="\t")
|
|
180 #write.table(IdC2,paste0(substr(args[1],1,nchar(args[1])-15),"-IdC2.csv"),row.names=F,sep="\t")
|