### view tools/mytools/cdf.r @ 1:cdcb0ce84a1b

author xuebing Fri, 09 Mar 2012 19:45:15 -0500 9071e359b9a3
line wrap: on
line source
```
# bin and average
binaverage = function(x,nbin,rankORvalue){
# use x[,1] to bin x[,2]
binned = list()
if (rankORvalue == 'value'){
mi = min(x[,1])
ma = max(x[,1])
bins = seq(mi,ma,length.out=nbin+1)
bins[1] = bins[1] - abs(mi)/100
bins[nbin+1] = bins[nbin+1] + abs(ma)/100
for (i in 1:nbin){
binned[[i]] = x[x[,1] >= bins[i] & x[,1] < bins[i+1],2]
}
bins[1] = bins[1] + abs(mi)/100
bins[nbin+1] = bins[nbin+1] - abs(ma)/100
} else {
x = x[order(x[,1]),]
step = round(nrow(x)/nbin)
bins = x[1,1]
for (i in 1:(nbin-1)){
binned[[i]] = x[((i-1)*step+1):(i*step),2]
bins = c(bins,x[i*step+1,1])
}
binned[[nbin]] = x[((nbin-1)*step+1):nrow(x),2]
bins[nbin+1] = x[nrow(x),1]
}
# bin lavel
labels = character(0)
for (i in 1:nbin){
labels = c(labels,paste(format(bins[i],digits=2,nsmall=2),format(bins[i+1],digits=2,nsmall=2),sep='~'))
}
list(binned=binned,bins=bins,labels=labels)
}
#res = binaverage(x,3,'rank')

# CDF plot and KS.test
mycdf = function(list,title,labels,legendposition,xlabel,legend_title){
L = length(list)

# barplot for mean and std
avg = numeric(L)
err = numeric(L)
for (i in 1:L){
avg[i] = mean(list[[i]])
err[i] = sd(list[[i]])
}
#print(list[[1]])
#print(list[[2]])
#print(avg)
#print(err)
par(cex=1.5,mar=c(8,6,6,4))
xticks <- barplot(avg,names.arg=labels,las=2,ylab=xlabel,main='mean and standard deviation',xlab=legend_title,ylim=c(0,max(avg+err)))
arrows(xticks,avg+err, xticks, avg-err, angle=90, code=3, length=0.0)

if (L>1){
# ks test
cat('\nKS test:\n')
cat('sample1\tsample2\tp-value\n')
cat('-------------------------------------------------\n')
for (i in 1:(L-1)){
for (j in (i+1):L){
cat(labels[i],'\t',labels[j],'\t')
ks = ks.test(list[[i]],list[[j]])
pv = max(2.2e-16,ks\$p.value)
pv = format(pv,digits=3,nsmall=2)
cat(pv,'\n')
}
}
cat('-------------------------------------------------\n')
}
if (L == 2){
title = paste(title,'\np=',pv,sep='')
}
# cdf plot
listx = list()
listy = list()
mi = 1e10
ma = 1e-10
for (i in 1:L){
mi = min(mi,list[[i]])
ma = max(ma,list[[i]])
}
for (i in 1:L){
listx[[i]] = c(mi,listx[i],ma)
listy[[i]] = c(0,listy[i],1)
}
for (i in 1:L){
mi = min(mi,list[[i]])
ma = max(ma,list[[i]])
listx[[i]] = sort(list[[i]])
listy[[i]] = c(1:length(list[[i]]))/length(list[[i]])
}
#par(xlog=(xlog=='xlog'))
plot(listx[[1]],listy[[1]],type='l',lty=1,lwd=2,col=2,main=title,xlab=xlabel,ylab='cumulative frequency')
for (i in 2:L){
lines(listx[[i]],listy[[i]],type='l',lty=i,lwd=2,col=i+1)
}
# legend
for (i in 1:L){
labels[i] = paste(labels[i],', n=',length(list[[i]]),sep='')
}
legend(legendposition,legend=labels,col=2:(L+1), lty=1:L,lwd=2, bty='n',title=legend_title)
}
```