# HG changeset patch
# User ecology
# Date 1556295442 14400
# Node ID be4e28da3919fcc3d0ece74b646145c73b987907
# Parent c55e09a8b4c880515cd14181faf1b5b8672cc33a
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tools/vigiechiro commit 7ef0e58cbcbf41088e359f00b6c86504c773c271
diff -r c55e09a8b4c8 -r be4e28da3919 BilanEnrichiRP.R
--- a/BilanEnrichiRP.R Wed Mar 13 11:17:44 2019 -0400
+++ b/BilanEnrichiRP.R Fri Apr 26 12:17:22 2019 -0400
@@ -1,184 +1,192 @@
+#!/usr/bin/env Rscript
+
+args <- commandArgs(trailingOnly = TRUE)
+
suppressMessages(library(data.table))
suppressMessages(library(DT))
suppressMessages(library(htmlwidgets))
-args <- commandArgs(trailingOnly = TRUE)
-EchelleErreur=c("NA","POSSIBLE","PROBABLE","SUR")
+EchelleErreur=c("","POSSIBLE","PROBABLE","SUR")
EchelleNumErreur=c(99,50,10,1)
-
-IdC2=fread(args[1])
-refRP=fread(args[2])
-GroupList=fread(args[3])
+IdC2=fread(args[1],encoding="UTF-8")
if(substr(IdC2$`nom du fichier`[1],2,2)!="i")
{
- stop("Protocole non conforme, ce script doit etre lance pour un protocole Routier ou Pedestre",call.=FALSE)
-}
-
-Routier=grepl("-",substr(IdC2$`nom du fichier`[1],4,7))
-#compute error risk by species (minimum error among files)
-#to be replaced by glm outputs if I'll have time
-RisqueErreurT=aggregate(IdC2$IdProb,by=list(IdC2$IdExtrap),FUN=function(x) ceiling((1-max(x-0.0001))*100))
-#barplot(RisqueErreurT$x,names.arg=RisqueErreurT$Group.1,las=2)
-#compute error risk accoring to observer/validator (a little dirty because it relies on alphabetical order of confidence classes: POSSIBLE < PROBABLE < SUR)
-RisqueErreurOV0=match(IdC2$ConfV,EchelleErreur)
-RisqueErreurOV=aggregate(RisqueErreurOV0,by=list(IdC2$IdExtrap)
- ,FUN=max)
-RisqueErreurOV2=EchelleNumErreur[RisqueErreurOV$x]
-#compute minimum error risk between man and machine
-RisqueErreur=pmin(RisqueErreurT$x,RisqueErreurOV2,na.rm=TRUE)
-
-#compute number of files validated per species
-FichValid=aggregate(IdC2$IdV,by=list(IdC2$IdExtrap,IdC2$'nom du fichier')
- ,FUN=function(x) sum(!is.na(x)))
-NbValid2=aggregate(FichValid$x,by=list(FichValid$Group.1),FUN=function(x) sum(x>0))
-
-DiffC50=vector() # to store the median of confidence difference between unvalidated records and validated ones
-DiffT50=vector() # to store the median of time difference between unvalidated records and validated ones
-for (j in 1:nlevels(as.factor(IdC2$IdExtrap)))
-{
- IdSp=subset(IdC2
- ,IdC2$IdExtrap==levels(as.factor(IdC2$IdExtrap))[j])
- IdSp$IdProb[is.na(IdSp$IdProb)]=0
- IdSp=IdSp[order(IdSp$IdProb),]
- IdSpV=subset(IdSp,!is.na(IdSp$IdV))
- if(nrow(IdSpV)>0)
- {
- cuts <- c(-Inf, IdSpV$IdProb[-1]-diff(IdSpV$IdProb)/2, Inf)
- CorrC=findInterval(IdSp$IdProb, cuts)
- CorrC2=IdSpV$IdProb[CorrC]
- DiffC=abs(IdSp$IdProb-CorrC2)
- DiffC50=c(DiffC50,median(DiffC))
-
- IdSp=IdSp[order(IdSp$TimeNum),]
- IdSpV=subset(IdSp,!is.na(IdSp$IdV))
- cuts <- c(-Inf, IdSpV$TimeNum[-1]-diff(IdSpV$TimeNum)/2, Inf)
- CorrT=findInterval(IdSp$TimeNum, cuts)
- CorrT2=IdSpV$TimeNum[CorrT]
- DiffT=abs(IdSp$TimeNum-CorrT2)
- DiffT50=c(DiffT50,median(DiffT))
- }else{
- DiffC50=c(DiffC50,Inf)
- DiffT50=c(DiffT50,Inf)
- }
-}
-#compute an index of validation effort per species
-EffortV=1/DiffC50/DiffT50
-EffortClass=(EffortV>0.0005)+(EffortV>0.005)+RisqueErreurOV$x
-#cbind(RisqueErreurOV,EffortV,DiffC50,DiffT50)
-#barplot(EffortClass-1,names.arg=NbValid2$Group.1,las=2)
-ClassEffortV=c("-","FAIBLE","SUFFISANT","SUFFISANT","FORT","FORT")
-EffortClassMot=ClassEffortV[EffortClass]
-
-
-#compare activity / reference frame
-FileInfo=as.data.table(tstrsplit(IdC2$`nom du fichier`,"-"))
-IdC2$Tron=FileInfo$V4
-
-MicTempsInfo=as.data.table(tstrsplit(as.data.frame(FileInfo)[,(ncol(FileInfo))],"_"))
-MicDroit=(as.data.frame(MicTempsInfo)[,(ncol(MicTempsInfo)-2)]=="1")
-IdC2$MicDroit=MicDroit
-
-testTempsFin=aggregate(IdC2$temps_fin,by=list(MicDroit),FUN=max)
-testTempsFin$Direct=(testTempsFin$x>0.5)
-testTF2=sum((testTempsFin$x>0.5))
-if(testTF2>1){stop("Probleme stereo : les 2 canaux semblent etre en enregistrement direct")}
-IdC2M=merge(IdC2,testTempsFin,by.x="MicDroit",by.y="Group.1")
+# print("Protocole non conforme, ce script doit etre lance uniquement pour un protocole Routier ou Pedestre")
+ print("Wrong protocol, please only use this tool for a \'Pedestre\' or \'Routier\' protocol.")
+}else{
-ActMoy=aggregate(IdC2$`nom du fichier`
- ,by=list(IdC2M$IdExtrap,IdC2M$Direct),FUN=length)
-ListSpref=match(levels(as.factor(ActMoy$Group.1)),refRP$Espece)
-Subref=refRP[ListSpref]
-if(Routier)
-{
- Subref=Subref[,c(1:17)]
-}else{
- Subref=Subref[,c(1,18:33)]
-}
-QualifActE=vector()
-QualifActD=vector()
-
-for (k in 1:nlevels(as.factor(ActMoy$Group.1)))
-{
- Actsub=subset(ActMoy,ActMoy$Group.1==levels(as.factor(ActMoy$Group.1))[k])
- if(is.na(Subref[k,2]))
+ refRP=fread(args[2],encoding="UTF-8")
+ GroupList=fread(args[3],encoding="Latin-1")
+
+ IdC2$ConfV[is.na(IdC2$ConfV)]=""
+
+
+ Routier=grepl("-",substr(IdC2$`nom du fichier`[1],4,7))
+ #compute error risk by species (minimum error among files)
+ #to be replaced by glm outputs if I'll have time
+ RisqueErreurT=aggregate(IdC2$IdProb,by=list(IdC2$IdExtrap)
+ ,FUN=function(x) round((1-max(x))*100))
+ barplot(RisqueErreurT$x,names.arg=RisqueErreurT$Group.1,las=2)
+ #compute error risk accoring to observer/validator (a little dirty because it relies on alphabetical order of confidence classes: POSSIBLE < PROBABLE < SUR)
+ RisqueErreurOV0=match(IdC2$ConfV,EchelleErreur)
+ RisqueErreurOV=aggregate(RisqueErreurOV0,by=list(IdC2$IdExtrap)
+ ,FUN=max)
+ RisqueErreurOV2=EchelleNumErreur[RisqueErreurOV$x]
+ #compute minimum error risk between man and machine
+ RisqueErreur=pmin(RisqueErreurT$x,RisqueErreurOV2)
+
+ #compute number of files validated per species
+ FichValid=aggregate(IdC2$IdV,by=list(IdC2$IdExtrap,IdC2$'nom du fichier')
+ ,FUN=function(x) sum(x!=""))
+ NbValid2=aggregate(FichValid$x,by=list(FichValid$Group.1),FUN=function(x) sum(x>0))
+
+ DiffC50=vector() # to store the median of confidence difference between unvalidated records and validated ones
+ DiffT50=vector() # to store the median of time difference between unvalidated records and validated ones
+ for (j in 1:nlevels(as.factor(IdC2$IdExtrap)))
{
- QualifActE=c(QualifActE,NA)
- QualifActD=c(QualifActD,NA)
+ IdSp=subset(IdC2
+ ,IdC2$IdExtrap==levels(as.factor(IdC2$IdExtrap))[j])
+ IdSp$IdProb[is.na(IdSp$IdProb)]=0
+ IdSp=IdSp[order(IdSp$IdProb),]
+ IdSpV=subset(IdSp,IdSp$IdV!="")
+ if(nrow(IdSpV)>0)
+ {
+ cuts <- c(-Inf, IdSpV$IdProb[-1]-diff(IdSpV$IdProb)/2, Inf)
+ CorrC=findInterval(IdSp$IdProb, cuts)
+ CorrC2=IdSpV$IdProb[CorrC]
+ DiffC=abs(IdSp$IdProb-CorrC2)
+ DiffC50=c(DiffC50,median(DiffC))
+
+ IdSp=IdSp[order(IdSp$TimeNum),]
+ IdSpV=subset(IdSp,IdSp$IdV!="")
+ cuts <- c(-Inf, IdSpV$TimeNum[-1]-diff(IdSpV$TimeNum)/2, Inf)
+ CorrT=findInterval(IdSp$TimeNum, cuts)
+ CorrT2=IdSpV$TimeNum[CorrT]
+ DiffT=abs(IdSp$TimeNum-CorrT2)
+ DiffT50=c(DiffT50,median(DiffT))
}else{
- ActE=subset(Actsub,Actsub$Group.2==F)
- if(nrow(ActE)==0)
- {
- QualifActE=c(QualifActE,NA)
-
- }else{
- cuts=cbind(-Inf,as.numeric(Subref[k,6]),as.numeric(Subref[k,7])
- ,as.numeric(Subref[k,8]),Inf)
- QualifActE=c(QualifActE,findInterval(ActE$x,cuts,left.open=T))
- }
- ActD=subset(Actsub,Actsub$Group.2==T)
- if(nrow(ActD)==0)
- {
- QualifActD=c(QualifActD,NA)
-
+ DiffC50=c(DiffC50,Inf)
+ DiffT50=c(DiffT50,Inf)
+ }
+ }
+ #compute an index of validation effort per species
+ EffortV=1/DiffC50/DiffT50
+ EffortClass=(EffortV>0.0005)+(EffortV>0.005)+RisqueErreurOV$x
+ #cbind(RisqueErreurOV,EffortV,DiffC50,DiffT50)
+ barplot(EffortClass-1,names.arg=NbValid2$Group.1,las=2)
+ ClassEffortV=c("-","FAIBLE","SUFFISANT","SUFFISANT","FORT","FORT")
+ EffortClassMot=ClassEffortV[EffortClass]
+
+
+ #compare activity / reference frame
+ FileInfo=as.data.table(tstrsplit(IdC2$`nom du fichier`,"-"))
+ IdC2$Tron=FileInfo$V4
+
+ MicTempsInfo=as.data.table(tstrsplit(as.data.frame(FileInfo)[,(ncol(FileInfo))],"_"))
+ MicDroit=(as.data.frame(MicTempsInfo)[,(ncol(MicTempsInfo)-2)]=="1")
+ IdC2$MicDroit=MicDroit
+
+ testTempsFin=aggregate(IdC2$temps_fin,by=list(MicDroit),FUN=max)
+ testTempsFin$Direct=(testTempsFin$x>0.5)
+ testTF2=sum((testTempsFin$x>0.5))
+ if(testTF2>1){stop("Probleme stereo : les 2 canaux semblent etre en enregistrement direct")}
+ IdC2M=merge(IdC2,testTempsFin,by.x="MicDroit",by.y="Group.1")
+
+ ActMoy=aggregate(IdC2$`nom du fichier`
+ ,by=list(IdC2M$IdExtrap,IdC2M$Direct),FUN=length)
+ ListSpref=match(levels(as.factor(ActMoy$Group.1)),refRP$Espece)
+ Subref=refRP[ListSpref]
+ if(Routier)
+ {
+ Subref=Subref[,c(1:17)]
+ }else{
+ Subref=Subref[,c(1,18:33)]
+ }
+ QualifActE=vector()
+ QualifActD=vector()
+
+ for (k in 1:nlevels(as.factor(ActMoy$Group.1)))
+ {
+ Actsub=subset(ActMoy,ActMoy$Group.1==levels(as.factor(ActMoy$Group.1))[k])
+ if(is.na(Subref[k,2]))
+ {
+ QualifActE=c(QualifActE,NA)
+ QualifActD=c(QualifActD,NA)
}else{
- cuts=cbind(-Inf,as.numeric(Subref[k,14]),as.numeric(Subref[k,15])
- ,as.numeric(Subref[k,16]),Inf)
- QualifActD=c(QualifActD,findInterval(ActD$x,cuts,left.open=T))
- }
-
- }
-}
-ClassAct=c("FAIBLE","MODEREE","FORTE","TRES FORTE")
-QualifActMotE=ClassAct[QualifActE]
-QualifActMotD=ClassAct[QualifActD]
-
-#compute activity by nights (to be completed)
-#ActNuit=aggregate(IdC2M$`nom du fichier`,by=list(IdC2M$DateNuit,IdC2M$IdExtrap),FUN=length)
-ActED=dcast(data=ActMoy,formula=Group.1~Group.2,value=x)
-ActED[is.na(ActED)]=0
-#organize the csv summary
-SummPart0=cbind(Esp=levels(as.factor(IdC2M$IdExtrap))
- ,RisqueErreur,NbValid=NbValid2$x,EffortValid=EffortClassMot)
-
-test=match("FALSE",colnames(ActED))
-if(is.na(test)==F)
-{
- SummPart0=cbind(SummPart0,Contacts_Expansion=ActED$'FALSE'
- ,Niveau_Activite_Expansion=QualifActMotE)
-}else{
- SummPart0=cbind(SummPart0,Contacts_Expansion=""
- ,Niveau_Activite_Expansion="")
-}
-test=match("TRUE",colnames(ActED))
-if(is.na(test)==F)
-{
+ ActE=subset(Actsub,Actsub$Group.2==F)
+ if(nrow(ActE)==0)
+ {
+ QualifActE=c(QualifActE,NA)
+
+ }else{
+ cuts=cbind(-Inf,as.numeric(Subref[k,6]),as.numeric(Subref[k,7])
+ ,as.numeric(Subref[k,8]),Inf)
+ QualifActE=c(QualifActE,findInterval(ActE$x,cuts,left.open=T))
+ }
+ ActD=subset(Actsub,Actsub$Group.2==T)
+ if(nrow(ActD)==0)
+ {
+ QualifActD=c(QualifActD,NA)
+
+ }else{
+ cuts=cbind(-Inf,as.numeric(Subref[k,14]),as.numeric(Subref[k,15])
+ ,as.numeric(Subref[k,16]),Inf)
+ QualifActD=c(QualifActD,findInterval(ActD$x,cuts,left.open=T))
+ }
+
+ }
+ }
+ ClassAct=c("FAIBLE","MODEREE","FORTE","TRES FORTE")
+ QualifActMotE=ClassAct[QualifActE]
+ QualifActMotD=ClassAct[QualifActD]
- SummPart0=cbind(SummPart0,Contacts_Direct=ActED$'TRUE'
- ,Niveau_Activite_Direct=QualifActMotD)
-}else{
- SummPart0=cbind(SummPart0,Contacts_Direct=""
- ,Niveau_Activite_Direct="")
-}
-
-InfoSp=c("GroupFR","NomFR","Scientific name","Esp")
-GroupShort=GroupList[,..InfoSp]
-SummPart=merge(GroupShort,SummPart0,by="Esp")
-IndexGroupe=c("Autre","Sauterelle","Chauve-souris")
-SummPart$IndexSumm=match(SummPart$GroupFR,IndexGroupe)
-SummPart=SummPart[with(SummPart
- ,order(IndexSumm,as.numeric(Contacts_Direct),as.numeric(Contacts_Expansion),decreasing=T)),]
-colnames(SummPart)=c("Code","Groupe","Nom francais","Nom scientifique"
+ #compute activity by nights (to be completed)
+ #ActNuit=aggregate(IdC2M$`nom du fichier`,by=list(IdC2M$DateNuit,IdC2M$IdExtrap),FUN=length)
+ ActED=dcast(data=ActMoy,formula=Group.1~Group.2,value=x)
+ ActED[is.na(ActED)]=0
+ #organize the csv summary
+ SummPart0=cbind(Esp=levels(as.factor(IdC2M$IdExtrap))
+ ,RisqueErreur,NbValid=NbValid2$x,EffortValid=EffortClassMot)
+
+ test=match("FALSE",colnames(ActED))
+ if(is.na(test)==F)
+ {
+ SummPart0=cbind(SummPart0,Contacts_Expansion=ActED$'FALSE'
+ ,Niveau_Activite_Expansion=QualifActMotE)
+ }else{
+ SummPart0=cbind(SummPart0,Contacts_Expansion=""
+ ,Niveau_Activite_Expansion="")
+ }
+ test=match("TRUE",colnames(ActED))
+ if(is.na(test)==F)
+ {
+
+ SummPart0=cbind(SummPart0,Contacts_Direct=ActED$'TRUE'
+ ,Niveau_Activite_Direct=QualifActMotD)
+ }else{
+ SummPart0=cbind(SummPart0,Contacts_Direct=""
+ ,Niveau_Activite_Direct="")
+ }
+
+ InfoSp=c("GroupFR","NomFR","Scientific name","Esp")
+ GroupShort=GroupList[,..InfoSp]
+ SummPart=merge(GroupShort,SummPart0,by="Esp")
+ IndexGroupe=c("Autre","Sauterelle","Chauve-souris")
+ SummPart$IndexSumm=match(SummPart$GroupFR,IndexGroupe)
+ SummPart=SummPart[with(SummPart
+ ,order(IndexSumm,as.numeric(Contacts_Direct),as.numeric(Contacts_Expansion),decreasing=T)),]
+ colnames(SummPart)=c("Code","Groupe","Nom francais","Nom scientifique"
,"Risque d'erreur (%)","Nb Validations"
,"Effort de validation","Nb de Contacts en expansion"
,"Niveau d'Activite en expansion"
,"Nb de Contacts en direct"
,"Niveau d'Activite en direct","TriGroupe")
-#to do: extend colors to other columns to improve readability
-SummHTML=datatable(SummPart, rownames = FALSE) %>%
+ #to do: extend colors to other columns to improve readability
+ SummHTML=datatable(SummPart, rownames = FALSE) %>%
formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)",
background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>%
formatStyle(columns = "Effort de validation",
@@ -189,32 +197,35 @@
background = styleEqual(c("FAIBLE","MODEREE","FORTE","TRES FORTE"), c("palegoldenrod", "greenyellow", "limegreen", "darkgreen")))
-saveWidget(SummHTML,"output-summaryRP.html")
-write.table(SummPart,"output-summaryRP.tabular",row.names=F,sep="\t",quote=FALSE)
-
-#summary for each point/transect
-
-#compute number of files validated per species
-IdC2M$Canal=sapply(IdC2M$Direct,FUN=function(x) if(x){"Direct"}else{"Expansion"})
-
-ActMoyTA=aggregate(IdC2M$`nom du fichier`,by=list(IdC2M$IdExtrap,IdC2M$Canal,IdC2M$Session),FUN=length)
-ActMoyT=dcast(data=IdC2M,formula=IdExtrap+Canal~Session
- ,fun.aggregate=length)
-SummPartshort=cbind(SummPart[,c(1:5)],TriGroupe=SummPart[,TriGroupe])
-SummPartTron=merge(SummPartshort,ActMoyT,by.x="Code",by.y="IdExtrap")
-SummPartTron=SummPartTron[order(TriGroupe,decreasing=T),]
-
-ListSession=levels(as.factor(IdC2M$Session))
-brks <- quantile(ActMoyTA$x, probs = seq(.05, .95, .05), na.rm = TRUE)-1
-clrs <- round(seq(255, 40, length.out = length(brks) + 1), 0) %>%
-{paste0("rgb(255,", ., ",", ., ")")}
-
-
-#to do: extend colors to other columns to improve readability
-SummHTMLT=datatable(SummPartTron, rownames = FALSE) %>%
+ saveWidget(SummHTML,"output-summaryRP.html")
+ write.table(SummPart,"output-summaryRP.tabular",row.names=F,sep="\t")
+ #write.csv2(SummPart,"output-summaryRP.tabular",row.names=F) #for testing
+
+ #summary for each point/transect
+
+ #compute number of files validated per species
+ IdC2M$Canal=sapply(IdC2M$Direct,FUN=function(x) if(x){"Direct"}else{"Expansion"})
+
+ ActMoyTA=aggregate(IdC2M$`nom du fichier`,by=list(IdC2M$IdExtrap,IdC2M$Canal,IdC2M$Session),FUN=length)
+ ActMoyT=dcast(data=IdC2M,formula=IdExtrap+Canal~Session
+ ,fun.aggregate=length)
+ SummPartshort=cbind(SummPart[,c(1:5)],TriGroupe=SummPart[,TriGroupe])
+ SummPartTron=merge(SummPartshort,ActMoyT,by.x="Code",by.y="IdExtrap")
+ SummPartTron=SummPartTron[order(TriGroupe,decreasing=T),]
+
+ ListSession=levels(as.factor(IdC2M$Session))
+ brks <- quantile(ActMoyTA$x, probs = seq(.05, .95, .05), na.rm = TRUE)-1
+ clrs <- round(seq(255, 40, length.out = length(brks) + 1), 0) %>%
+ {paste0("rgb(255,", ., ",", ., ")")}
+
+
+ #to do: extend colors to other columns to improve readability
+ SummHTMLT=datatable(SummPartTron, rownames = FALSE) %>%
formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)",
background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>%
formatStyle(columns=ListSession, backgroundColor = styleInterval(brks, clrs))
-saveWidget(SummHTMLT,"output-detailRP.html")
-write.table(SummPartTron,"output-detailRP.tabular",row.names=F,sep="\t",quote=FALSE)
+ saveWidget(SummHTMLT,"output-detailRP.html")
+ write.table(SummPartTron,"output-detailRP.tabular",row.names=F,sep="\t")
+# write.csv2(SummPartTron,"output-detailRP.tabular",row.names=F)#for testing
+}
diff -r c55e09a8b4c8 -r be4e28da3919 BilanEnrichiRPen.xml
--- a/BilanEnrichiRPen.xml Wed Mar 13 11:17:44 2019 -0400
+++ b/BilanEnrichiRPen.xml Fri Apr 26 12:17:22 2019 -0400
@@ -2,8 +2,10 @@
from Animal Detection on Acoustic Recordings
vigiechiro_macros.xml
-
-
+
+
+
+
- 0.1.0
+ 0.1.1
@@ -28,13 +28,16 @@
10.5334/jors.154
-
+
- r-reshape2
- r-data.table
- r-dt
- r-htmlwidgets
- pandoc
-
+ r-data.table
+
+
+
+
+ r-reshape2
+ r-dt
+ r-htmlwidgets
+ pandoc