# HG changeset patch # User ecology # Date 1556295442 14400 # Node ID be4e28da3919fcc3d0ece74b646145c73b987907 # Parent c55e09a8b4c880515cd14181faf1b5b8672cc33a planemo upload for repository https://github.com/galaxyecology/tools-ecology/tools/vigiechiro commit 7ef0e58cbcbf41088e359f00b6c86504c773c271 diff -r c55e09a8b4c8 -r be4e28da3919 BilanEnrichiRP.R --- a/BilanEnrichiRP.R Wed Mar 13 11:17:44 2019 -0400 +++ b/BilanEnrichiRP.R Fri Apr 26 12:17:22 2019 -0400 @@ -1,184 +1,192 @@ +#!/usr/bin/env Rscript + +args <- commandArgs(trailingOnly = TRUE) + suppressMessages(library(data.table)) suppressMessages(library(DT)) suppressMessages(library(htmlwidgets)) -args <- commandArgs(trailingOnly = TRUE) -EchelleErreur=c("NA","POSSIBLE","PROBABLE","SUR") +EchelleErreur=c("","POSSIBLE","PROBABLE","SUR") EchelleNumErreur=c(99,50,10,1) - -IdC2=fread(args[1]) -refRP=fread(args[2]) -GroupList=fread(args[3]) +IdC2=fread(args[1],encoding="UTF-8") if(substr(IdC2$`nom du fichier`[1],2,2)!="i") { - stop("Protocole non conforme, ce script doit etre lance pour un protocole Routier ou Pedestre",call.=FALSE) -} - -Routier=grepl("-",substr(IdC2$`nom du fichier`[1],4,7)) -#compute error risk by species (minimum error among files) -#to be replaced by glm outputs if I'll have time -RisqueErreurT=aggregate(IdC2$IdProb,by=list(IdC2$IdExtrap),FUN=function(x) ceiling((1-max(x-0.0001))*100)) -#barplot(RisqueErreurT$x,names.arg=RisqueErreurT$Group.1,las=2) -#compute error risk accoring to observer/validator (a little dirty because it relies on alphabetical order of confidence classes: POSSIBLE < PROBABLE < SUR) -RisqueErreurOV0=match(IdC2$ConfV,EchelleErreur) -RisqueErreurOV=aggregate(RisqueErreurOV0,by=list(IdC2$IdExtrap) - ,FUN=max) -RisqueErreurOV2=EchelleNumErreur[RisqueErreurOV$x] -#compute minimum error risk between man and machine -RisqueErreur=pmin(RisqueErreurT$x,RisqueErreurOV2,na.rm=TRUE) - -#compute number of files validated per species -FichValid=aggregate(IdC2$IdV,by=list(IdC2$IdExtrap,IdC2$'nom du fichier') - ,FUN=function(x) sum(!is.na(x))) -NbValid2=aggregate(FichValid$x,by=list(FichValid$Group.1),FUN=function(x) sum(x>0)) - -DiffC50=vector() # to store the median of confidence difference between unvalidated records and validated ones -DiffT50=vector() # to store the median of time difference between unvalidated records and validated ones -for (j in 1:nlevels(as.factor(IdC2$IdExtrap))) -{ - IdSp=subset(IdC2 - ,IdC2$IdExtrap==levels(as.factor(IdC2$IdExtrap))[j]) - IdSp$IdProb[is.na(IdSp$IdProb)]=0 - IdSp=IdSp[order(IdSp$IdProb),] - IdSpV=subset(IdSp,!is.na(IdSp$IdV)) - if(nrow(IdSpV)>0) - { - cuts <- c(-Inf, IdSpV$IdProb[-1]-diff(IdSpV$IdProb)/2, Inf) - CorrC=findInterval(IdSp$IdProb, cuts) - CorrC2=IdSpV$IdProb[CorrC] - DiffC=abs(IdSp$IdProb-CorrC2) - DiffC50=c(DiffC50,median(DiffC)) - - IdSp=IdSp[order(IdSp$TimeNum),] - IdSpV=subset(IdSp,!is.na(IdSp$IdV)) - cuts <- c(-Inf, IdSpV$TimeNum[-1]-diff(IdSpV$TimeNum)/2, Inf) - CorrT=findInterval(IdSp$TimeNum, cuts) - CorrT2=IdSpV$TimeNum[CorrT] - DiffT=abs(IdSp$TimeNum-CorrT2) - DiffT50=c(DiffT50,median(DiffT)) - }else{ - DiffC50=c(DiffC50,Inf) - DiffT50=c(DiffT50,Inf) - } -} -#compute an index of validation effort per species -EffortV=1/DiffC50/DiffT50 -EffortClass=(EffortV>0.0005)+(EffortV>0.005)+RisqueErreurOV$x -#cbind(RisqueErreurOV,EffortV,DiffC50,DiffT50) -#barplot(EffortClass-1,names.arg=NbValid2$Group.1,las=2) -ClassEffortV=c("-","FAIBLE","SUFFISANT","SUFFISANT","FORT","FORT") -EffortClassMot=ClassEffortV[EffortClass] - - -#compare activity / reference frame -FileInfo=as.data.table(tstrsplit(IdC2$`nom du fichier`,"-")) -IdC2$Tron=FileInfo$V4 - -MicTempsInfo=as.data.table(tstrsplit(as.data.frame(FileInfo)[,(ncol(FileInfo))],"_")) -MicDroit=(as.data.frame(MicTempsInfo)[,(ncol(MicTempsInfo)-2)]=="1") -IdC2$MicDroit=MicDroit - -testTempsFin=aggregate(IdC2$temps_fin,by=list(MicDroit),FUN=max) -testTempsFin$Direct=(testTempsFin$x>0.5) -testTF2=sum((testTempsFin$x>0.5)) -if(testTF2>1){stop("Probleme stereo : les 2 canaux semblent etre en enregistrement direct")} -IdC2M=merge(IdC2,testTempsFin,by.x="MicDroit",by.y="Group.1") +# print("Protocole non conforme, ce script doit etre lance uniquement pour un protocole Routier ou Pedestre") + print("Wrong protocol, please only use this tool for a \'Pedestre\' or \'Routier\' protocol.") +}else{ -ActMoy=aggregate(IdC2$`nom du fichier` - ,by=list(IdC2M$IdExtrap,IdC2M$Direct),FUN=length) -ListSpref=match(levels(as.factor(ActMoy$Group.1)),refRP$Espece) -Subref=refRP[ListSpref] -if(Routier) -{ - Subref=Subref[,c(1:17)] -}else{ - Subref=Subref[,c(1,18:33)] -} -QualifActE=vector() -QualifActD=vector() - -for (k in 1:nlevels(as.factor(ActMoy$Group.1))) -{ - Actsub=subset(ActMoy,ActMoy$Group.1==levels(as.factor(ActMoy$Group.1))[k]) - if(is.na(Subref[k,2])) + refRP=fread(args[2],encoding="UTF-8") + GroupList=fread(args[3],encoding="Latin-1") + + IdC2$ConfV[is.na(IdC2$ConfV)]="" + + + Routier=grepl("-",substr(IdC2$`nom du fichier`[1],4,7)) + #compute error risk by species (minimum error among files) + #to be replaced by glm outputs if I'll have time + RisqueErreurT=aggregate(IdC2$IdProb,by=list(IdC2$IdExtrap) + ,FUN=function(x) round((1-max(x))*100)) + barplot(RisqueErreurT$x,names.arg=RisqueErreurT$Group.1,las=2) + #compute error risk accoring to observer/validator (a little dirty because it relies on alphabetical order of confidence classes: POSSIBLE < PROBABLE < SUR) + RisqueErreurOV0=match(IdC2$ConfV,EchelleErreur) + RisqueErreurOV=aggregate(RisqueErreurOV0,by=list(IdC2$IdExtrap) + ,FUN=max) + RisqueErreurOV2=EchelleNumErreur[RisqueErreurOV$x] + #compute minimum error risk between man and machine + RisqueErreur=pmin(RisqueErreurT$x,RisqueErreurOV2) + + #compute number of files validated per species + FichValid=aggregate(IdC2$IdV,by=list(IdC2$IdExtrap,IdC2$'nom du fichier') + ,FUN=function(x) sum(x!="")) + NbValid2=aggregate(FichValid$x,by=list(FichValid$Group.1),FUN=function(x) sum(x>0)) + + DiffC50=vector() # to store the median of confidence difference between unvalidated records and validated ones + DiffT50=vector() # to store the median of time difference between unvalidated records and validated ones + for (j in 1:nlevels(as.factor(IdC2$IdExtrap))) { - QualifActE=c(QualifActE,NA) - QualifActD=c(QualifActD,NA) + IdSp=subset(IdC2 + ,IdC2$IdExtrap==levels(as.factor(IdC2$IdExtrap))[j]) + IdSp$IdProb[is.na(IdSp$IdProb)]=0 + IdSp=IdSp[order(IdSp$IdProb),] + IdSpV=subset(IdSp,IdSp$IdV!="") + if(nrow(IdSpV)>0) + { + cuts <- c(-Inf, IdSpV$IdProb[-1]-diff(IdSpV$IdProb)/2, Inf) + CorrC=findInterval(IdSp$IdProb, cuts) + CorrC2=IdSpV$IdProb[CorrC] + DiffC=abs(IdSp$IdProb-CorrC2) + DiffC50=c(DiffC50,median(DiffC)) + + IdSp=IdSp[order(IdSp$TimeNum),] + IdSpV=subset(IdSp,IdSp$IdV!="") + cuts <- c(-Inf, IdSpV$TimeNum[-1]-diff(IdSpV$TimeNum)/2, Inf) + CorrT=findInterval(IdSp$TimeNum, cuts) + CorrT2=IdSpV$TimeNum[CorrT] + DiffT=abs(IdSp$TimeNum-CorrT2) + DiffT50=c(DiffT50,median(DiffT)) }else{ - ActE=subset(Actsub,Actsub$Group.2==F) - if(nrow(ActE)==0) - { - QualifActE=c(QualifActE,NA) - - }else{ - cuts=cbind(-Inf,as.numeric(Subref[k,6]),as.numeric(Subref[k,7]) - ,as.numeric(Subref[k,8]),Inf) - QualifActE=c(QualifActE,findInterval(ActE$x,cuts,left.open=T)) - } - ActD=subset(Actsub,Actsub$Group.2==T) - if(nrow(ActD)==0) - { - QualifActD=c(QualifActD,NA) - + DiffC50=c(DiffC50,Inf) + DiffT50=c(DiffT50,Inf) + } + } + #compute an index of validation effort per species + EffortV=1/DiffC50/DiffT50 + EffortClass=(EffortV>0.0005)+(EffortV>0.005)+RisqueErreurOV$x + #cbind(RisqueErreurOV,EffortV,DiffC50,DiffT50) + barplot(EffortClass-1,names.arg=NbValid2$Group.1,las=2) + ClassEffortV=c("-","FAIBLE","SUFFISANT","SUFFISANT","FORT","FORT") + EffortClassMot=ClassEffortV[EffortClass] + + + #compare activity / reference frame + FileInfo=as.data.table(tstrsplit(IdC2$`nom du fichier`,"-")) + IdC2$Tron=FileInfo$V4 + + MicTempsInfo=as.data.table(tstrsplit(as.data.frame(FileInfo)[,(ncol(FileInfo))],"_")) + MicDroit=(as.data.frame(MicTempsInfo)[,(ncol(MicTempsInfo)-2)]=="1") + IdC2$MicDroit=MicDroit + + testTempsFin=aggregate(IdC2$temps_fin,by=list(MicDroit),FUN=max) + testTempsFin$Direct=(testTempsFin$x>0.5) + testTF2=sum((testTempsFin$x>0.5)) + if(testTF2>1){stop("Probleme stereo : les 2 canaux semblent etre en enregistrement direct")} + IdC2M=merge(IdC2,testTempsFin,by.x="MicDroit",by.y="Group.1") + + ActMoy=aggregate(IdC2$`nom du fichier` + ,by=list(IdC2M$IdExtrap,IdC2M$Direct),FUN=length) + ListSpref=match(levels(as.factor(ActMoy$Group.1)),refRP$Espece) + Subref=refRP[ListSpref] + if(Routier) + { + Subref=Subref[,c(1:17)] + }else{ + Subref=Subref[,c(1,18:33)] + } + QualifActE=vector() + QualifActD=vector() + + for (k in 1:nlevels(as.factor(ActMoy$Group.1))) + { + Actsub=subset(ActMoy,ActMoy$Group.1==levels(as.factor(ActMoy$Group.1))[k]) + if(is.na(Subref[k,2])) + { + QualifActE=c(QualifActE,NA) + QualifActD=c(QualifActD,NA) }else{ - cuts=cbind(-Inf,as.numeric(Subref[k,14]),as.numeric(Subref[k,15]) - ,as.numeric(Subref[k,16]),Inf) - QualifActD=c(QualifActD,findInterval(ActD$x,cuts,left.open=T)) - } - - } -} -ClassAct=c("FAIBLE","MODEREE","FORTE","TRES FORTE") -QualifActMotE=ClassAct[QualifActE] -QualifActMotD=ClassAct[QualifActD] - -#compute activity by nights (to be completed) -#ActNuit=aggregate(IdC2M$`nom du fichier`,by=list(IdC2M$DateNuit,IdC2M$IdExtrap),FUN=length) -ActED=dcast(data=ActMoy,formula=Group.1~Group.2,value=x) -ActED[is.na(ActED)]=0 -#organize the csv summary -SummPart0=cbind(Esp=levels(as.factor(IdC2M$IdExtrap)) - ,RisqueErreur,NbValid=NbValid2$x,EffortValid=EffortClassMot) - -test=match("FALSE",colnames(ActED)) -if(is.na(test)==F) -{ - SummPart0=cbind(SummPart0,Contacts_Expansion=ActED$'FALSE' - ,Niveau_Activite_Expansion=QualifActMotE) -}else{ - SummPart0=cbind(SummPart0,Contacts_Expansion="" - ,Niveau_Activite_Expansion="") -} -test=match("TRUE",colnames(ActED)) -if(is.na(test)==F) -{ + ActE=subset(Actsub,Actsub$Group.2==F) + if(nrow(ActE)==0) + { + QualifActE=c(QualifActE,NA) + + }else{ + cuts=cbind(-Inf,as.numeric(Subref[k,6]),as.numeric(Subref[k,7]) + ,as.numeric(Subref[k,8]),Inf) + QualifActE=c(QualifActE,findInterval(ActE$x,cuts,left.open=T)) + } + ActD=subset(Actsub,Actsub$Group.2==T) + if(nrow(ActD)==0) + { + QualifActD=c(QualifActD,NA) + + }else{ + cuts=cbind(-Inf,as.numeric(Subref[k,14]),as.numeric(Subref[k,15]) + ,as.numeric(Subref[k,16]),Inf) + QualifActD=c(QualifActD,findInterval(ActD$x,cuts,left.open=T)) + } + + } + } + ClassAct=c("FAIBLE","MODEREE","FORTE","TRES FORTE") + QualifActMotE=ClassAct[QualifActE] + QualifActMotD=ClassAct[QualifActD] - SummPart0=cbind(SummPart0,Contacts_Direct=ActED$'TRUE' - ,Niveau_Activite_Direct=QualifActMotD) -}else{ - SummPart0=cbind(SummPart0,Contacts_Direct="" - ,Niveau_Activite_Direct="") -} - -InfoSp=c("GroupFR","NomFR","Scientific name","Esp") -GroupShort=GroupList[,..InfoSp] -SummPart=merge(GroupShort,SummPart0,by="Esp") -IndexGroupe=c("Autre","Sauterelle","Chauve-souris") -SummPart$IndexSumm=match(SummPart$GroupFR,IndexGroupe) -SummPart=SummPart[with(SummPart - ,order(IndexSumm,as.numeric(Contacts_Direct),as.numeric(Contacts_Expansion),decreasing=T)),] -colnames(SummPart)=c("Code","Groupe","Nom francais","Nom scientifique" + #compute activity by nights (to be completed) + #ActNuit=aggregate(IdC2M$`nom du fichier`,by=list(IdC2M$DateNuit,IdC2M$IdExtrap),FUN=length) + ActED=dcast(data=ActMoy,formula=Group.1~Group.2,value=x) + ActED[is.na(ActED)]=0 + #organize the csv summary + SummPart0=cbind(Esp=levels(as.factor(IdC2M$IdExtrap)) + ,RisqueErreur,NbValid=NbValid2$x,EffortValid=EffortClassMot) + + test=match("FALSE",colnames(ActED)) + if(is.na(test)==F) + { + SummPart0=cbind(SummPart0,Contacts_Expansion=ActED$'FALSE' + ,Niveau_Activite_Expansion=QualifActMotE) + }else{ + SummPart0=cbind(SummPart0,Contacts_Expansion="" + ,Niveau_Activite_Expansion="") + } + test=match("TRUE",colnames(ActED)) + if(is.na(test)==F) + { + + SummPart0=cbind(SummPart0,Contacts_Direct=ActED$'TRUE' + ,Niveau_Activite_Direct=QualifActMotD) + }else{ + SummPart0=cbind(SummPart0,Contacts_Direct="" + ,Niveau_Activite_Direct="") + } + + InfoSp=c("GroupFR","NomFR","Scientific name","Esp") + GroupShort=GroupList[,..InfoSp] + SummPart=merge(GroupShort,SummPart0,by="Esp") + IndexGroupe=c("Autre","Sauterelle","Chauve-souris") + SummPart$IndexSumm=match(SummPart$GroupFR,IndexGroupe) + SummPart=SummPart[with(SummPart + ,order(IndexSumm,as.numeric(Contacts_Direct),as.numeric(Contacts_Expansion),decreasing=T)),] + colnames(SummPart)=c("Code","Groupe","Nom francais","Nom scientifique" ,"Risque d'erreur (%)","Nb Validations" ,"Effort de validation","Nb de Contacts en expansion" ,"Niveau d'Activite en expansion" ,"Nb de Contacts en direct" ,"Niveau d'Activite en direct","TriGroupe") -#to do: extend colors to other columns to improve readability -SummHTML=datatable(SummPart, rownames = FALSE) %>% + #to do: extend colors to other columns to improve readability + SummHTML=datatable(SummPart, rownames = FALSE) %>% formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)", background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>% formatStyle(columns = "Effort de validation", @@ -189,32 +197,35 @@ background = styleEqual(c("FAIBLE","MODEREE","FORTE","TRES FORTE"), c("palegoldenrod", "greenyellow", "limegreen", "darkgreen"))) -saveWidget(SummHTML,"output-summaryRP.html") -write.table(SummPart,"output-summaryRP.tabular",row.names=F,sep="\t",quote=FALSE) - -#summary for each point/transect - -#compute number of files validated per species -IdC2M$Canal=sapply(IdC2M$Direct,FUN=function(x) if(x){"Direct"}else{"Expansion"}) - -ActMoyTA=aggregate(IdC2M$`nom du fichier`,by=list(IdC2M$IdExtrap,IdC2M$Canal,IdC2M$Session),FUN=length) -ActMoyT=dcast(data=IdC2M,formula=IdExtrap+Canal~Session - ,fun.aggregate=length) -SummPartshort=cbind(SummPart[,c(1:5)],TriGroupe=SummPart[,TriGroupe]) -SummPartTron=merge(SummPartshort,ActMoyT,by.x="Code",by.y="IdExtrap") -SummPartTron=SummPartTron[order(TriGroupe,decreasing=T),] - -ListSession=levels(as.factor(IdC2M$Session)) -brks <- quantile(ActMoyTA$x, probs = seq(.05, .95, .05), na.rm = TRUE)-1 -clrs <- round(seq(255, 40, length.out = length(brks) + 1), 0) %>% -{paste0("rgb(255,", ., ",", ., ")")} - - -#to do: extend colors to other columns to improve readability -SummHTMLT=datatable(SummPartTron, rownames = FALSE) %>% + saveWidget(SummHTML,"output-summaryRP.html") + write.table(SummPart,"output-summaryRP.tabular",row.names=F,sep="\t") + #write.csv2(SummPart,"output-summaryRP.tabular",row.names=F) #for testing + + #summary for each point/transect + + #compute number of files validated per species + IdC2M$Canal=sapply(IdC2M$Direct,FUN=function(x) if(x){"Direct"}else{"Expansion"}) + + ActMoyTA=aggregate(IdC2M$`nom du fichier`,by=list(IdC2M$IdExtrap,IdC2M$Canal,IdC2M$Session),FUN=length) + ActMoyT=dcast(data=IdC2M,formula=IdExtrap+Canal~Session + ,fun.aggregate=length) + SummPartshort=cbind(SummPart[,c(1:5)],TriGroupe=SummPart[,TriGroupe]) + SummPartTron=merge(SummPartshort,ActMoyT,by.x="Code",by.y="IdExtrap") + SummPartTron=SummPartTron[order(TriGroupe,decreasing=T),] + + ListSession=levels(as.factor(IdC2M$Session)) + brks <- quantile(ActMoyTA$x, probs = seq(.05, .95, .05), na.rm = TRUE)-1 + clrs <- round(seq(255, 40, length.out = length(brks) + 1), 0) %>% + {paste0("rgb(255,", ., ",", ., ")")} + + + #to do: extend colors to other columns to improve readability + SummHTMLT=datatable(SummPartTron, rownames = FALSE) %>% formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)", background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>% formatStyle(columns=ListSession, backgroundColor = styleInterval(brks, clrs)) -saveWidget(SummHTMLT,"output-detailRP.html") -write.table(SummPartTron,"output-detailRP.tabular",row.names=F,sep="\t",quote=FALSE) + saveWidget(SummHTMLT,"output-detailRP.html") + write.table(SummPartTron,"output-detailRP.tabular",row.names=F,sep="\t") +# write.csv2(SummPartTron,"output-detailRP.tabular",row.names=F)#for testing +} diff -r c55e09a8b4c8 -r be4e28da3919 BilanEnrichiRPen.xml --- a/BilanEnrichiRPen.xml Wed Mar 13 11:17:44 2019 -0400 +++ b/BilanEnrichiRPen.xml Fri Apr 26 12:17:22 2019 -0400 @@ -2,8 +2,10 @@ from Animal Detection on Acoustic Recordings vigiechiro_macros.xml - - + + + + - 0.1.0 + 0.1.1 @@ -28,13 +28,16 @@ 10.5334/jors.154 - + - r-reshape2 - r-data.table - r-dt - r-htmlwidgets - pandoc - + r-data.table + + + + + r-reshape2 + r-dt + r-htmlwidgets + pandoc