comparison Moyenne_geom.r @ 0:985f8839aebd draft default tip

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Geom_mean_workflow commit 3f11e193fd9ba5bf0c706cd5d65d6398166776cb
author ecology
date Sat, 25 Nov 2023 15:18:01 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:985f8839aebd
1 #### loading required R libraries
2 #### chargement des packages R utilisés
3 library(gdata)
4 library(XLConnect)
5 library(rms)
6
7 ###### overall parameters and settings
8 ###### paramètres globaux utilisés
9
10 args = commandArgs(trailingOnly=TRUE)
11 if (length(args)==0)
12 {
13 stop("This tool needs at least one argument")
14 }else{
15 data <- args[1]
16 sep <- args[2]
17 HR <- args[3]
18
19 }
20
21 if (HR =="false"){HR<-FALSE} else {HR<-TRUE}
22
23 ###nrep: number of samples used to calculate geometric means
24 ###nrep: nombre d'échantillons utilisés pour calculer les moyennes géométriques
25 nrep<-10000
26
27 #______________________________________________________________________________________________________________________________________________________________________________________________
28 ###### common functions
29 ###### fonction utiles pour la suite
30
31 convert.to.numeric<-function(x){
32 t(apply(x,1,function(x){as.double(sub(" ","",as.character(x)))}))}
33
34
35 ### calculus of the logarithm of nrep geometric means, sampling based on a lognormal distribution with the same moments as the empirical ones (means & Ics)
36 #to prevent negative values
37 ### calcul du logarithme de nrep moyennes géométriques, l'échantillonnage étant fait avec la distribution lognormale de mêmes moments que les momenst empriques (means et ICs)
38 #pour éviter d'avoir des valeurs négatives
39
40 lgeomean<-function(means,ICs,nrep)
41 {#means: vector: mean estimates for the different categories
42 #ICs: vector: in proportion to the mean, difference between the extremum of the 95% confidence interval and the mean
43 require(mvtnorm)
44 #calculation of the parameters of the log normal distribution (on the log scale)
45 #cf. http://127.0.0.1:26338/library/stats/html/Lognormal.html
46 logsigma<-sqrt(log((ICs/qnorm(0.975)/means)^2+1))
47 logmean<-log(means)-1/2*logsigma^2
48
49 #gaussian sampling on the log scale then taking exponential
50 temp<-exp(rmvnorm(nrep,mean=logmean,sigma=diag(logsigma*logsigma)))
51
52 #taking geometric mean over categories, but kept on the log scale
53 geomm.rep<-apply(temp,1,function(x){(mean(log(x),na.rm=TRUE))})
54 #c(mean(geomm.rep),sd(geomm.rep))
55 geomm.rep}
56 #_______________________________________________________________________________________________________________________________________________________________________________________________
57
58 ###### importation des données
59 ###### importation of data
60 temp<-read.csv(file=data,sep=sep,header=HR,encoding="UTF-8")
61
62 data2008_2012<-temp[4:14,]
63 data2013_2017<-temp[21:31,]
64
65 meandata2008_2012<-convert.to.numeric(data2008_2012[,c(3,6,9)])
66 ICdata2008_2012<-convert.to.numeric(data2008_2012[,c(5,8,11)])
67 meandata2013_2017<-convert.to.numeric(data2013_2017[,c(3,6,9)])
68 ICdata2013_2017<-convert.to.numeric(data2013_2017[,c(5,8,11)])
69
70 ####### code to calculate (nrep) logarithms of geometric means by region (Greco)
71 ####### code pour calculer les nrep logarithmes de moyennes géométriques par région (GRECO)
72
73 set.seed(1)
74 #first period
75 #première période
76 rest2008_2012<-sapply(1:dim(data2008_2012)[1],function(region){lgeomean(meandata2008_2012[region,],ICdata2008_2012[region,],nrep)})
77
78 set.seed(3)
79 #first period but with different seed
80 #première période mais avec une graine différente
81 rest2008_2012_s3<-sapply(1:dim(data2008_2012)[1],function(region){lgeomean(meandata2008_2012[region,],ICdata2008_2012[region,],nrep)})
82
83 set.seed(2)
84 #second period
85 #seconde période
86 rest2013_2017<-sapply(1:dim(data2013_2017)[1],function(region){lgeomean(meandata2013_2017[region,],ICdata2013_2017[region,],nrep)})
87
88
89 ####### code to summarize the above nrep logarithms of geometric means by region into the statistics of an overall geometric mean across regions, taking the first period as reference
90 ###### code pour passer des nrep logarithmes de moyenne géométrique par région aux statistiques de la moyenne géométrique globale, en prennat la première période comme référence
91
92 #for the first period
93 #pour la première période
94 Mean_2008_2012_scaled<-{temp<-apply(rest2008_2012_s3,1,function(x){mean(x)})-apply(rest2008_2012,1,function(x){mean(x)});c(mean(exp(temp)),sd(exp(temp)),quantile(exp(temp),prob=c(0.025,0.975)))}
95
96 #for the second period
97 #pour la seconde période
98 Mean_2013_2017_scaled<-{temp<-apply(rest2013_2017,1,function(x){mean(x)})-apply(rest2008_2012,1,function(x){mean(x)});c(mean(exp(temp)),sd(exp(temp)),quantile(exp(temp),prob=c(0.025,0.975)))}
99
100
101
102 ############### NATIONAL OUPUTS:
103 ############### SORTIES NATIONALES:
104
105 res2008_2012_scaled_df = data.frame(Mean_2008_2012_scaled)
106 res2008_2012_scaled_df=`rownames<-`(res2008_2012_scaled_df,c("mean","sd","2,5%","97,5%"))
107
108 res2013_2017_scaled_df = data.frame(Mean_2013_2017_scaled)
109 res2013_2017_scaled_df=`rownames<-`(res2013_2017_scaled_df,c("mean","sd","2,5%","97,5%"))
110
111
112 write.csv(res2008_2012_scaled_df, file = "res2008_2012_scaled.csv")
113 write.csv(res2013_2017_scaled_df,file= "res2013_2017_scaled.csv")
114
115 ############### REGIONAL OUPUTS:
116 ############### SORTIES REGIONALES (GRECO):
117
118 regres2008_2012_scaled<-apply(rest2008_2012_s3-rest2008_2012,2,function(x){temp<-x;c(mean=mean(exp(temp)),sd=sd(exp(temp)),quantile(exp(temp),prob=c(0.025,0.975)))})
119 regres2013_2017_scaled<-apply(rest2013_2017-rest2008_2012,2,function(x){temp<-x;c(mean=mean(exp(temp)),sd=sd(exp(temp)),quantile(exp(temp),prob=c(0.025,0.975)))})
120 dimnames(regres2008_2012_scaled)[[2]]<-as.character(data2008_2012[,2])
121 dimnames(regres2013_2017_scaled)[[2]]<-as.character(data2013_2017[,2])
122
123 write.csv(regres2008_2012_scaled, file = "regres2008_2012_scaled.csv")
124 write.csv(regres2013_2017_scaled, file = "regres2013_2017_scaled.csv")
125
126 ############### data to make a bar plot of the national evolution rate
127 histo_data = data.frame(
128 variable_name = c(names(res2008_2012_scaled_df),names(res2013_2017_scaled_df)),
129 variable = c(round(Mean_2008_2012_scaled[1]*100),round(Mean_2013_2017_scaled[1]*100)),
130 standard_deviation = c(Mean_2008_2012_scaled[2]*100,Mean_2013_2017_scaled[2]*100)
131 )
132
133 write.table(histo_data, file = "histo_data.tsv",row.names = F, col.names = T ,sep ="\t")
134
135 ############### data to make a map of the GRECO evolution rate
136
137 rate2008_2012 = data.frame(round(regres2008_2012_scaled[1,1:11]*100))
138 rate2013_2017 = data.frame(round(regres2013_2017_scaled[1,1:11]*100))
139
140 evol_rate = rate2013_2017-rate2008_2012
141 evol_rate = cbind(data2013_2017[,2],evol_rate)
142 colnames(evol_rate)<-c("Regions","Evolution_rate")
143
144
145 write.table(evol_rate,"evolution_rate.tsv",sep="\t",quote=F,row.names=F,col.names=T)
146
147
148
149
150
151
152
153
154
155
156