# HG changeset patch # User xuebing # Date 1333249584 14400 # Node ID 900fd0a7428de135f5147d692f510f05956b8fa3 # Parent a1d49f6b3180bf392f3fe8302ab362b3c3c2509d Uploaded diff -r a1d49f6b3180 -r 900fd0a7428d cdf.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cdf.r Sat Mar 31 23:06:24 2012 -0400 @@ -0,0 +1,103 @@ +# bin and average +binaverage = function(x,nbin,rankORvalue){ +# use x[,1] to bin x[,2] + binned = list() + if (rankORvalue == 'value'){ + mi = min(x[,1]) + ma = max(x[,1]) + bins = seq(mi,ma,length.out=nbin+1) + bins[1] = bins[1] - abs(mi)/100 + bins[nbin+1] = bins[nbin+1] + abs(ma)/100 + for (i in 1:nbin){ + binned[[i]] = x[x[,1] >= bins[i] & x[,1] < bins[i+1],2] + } + bins[1] = bins[1] + abs(mi)/100 + bins[nbin+1] = bins[nbin+1] - abs(ma)/100 + } else { + x = x[order(x[,1]),] + step = round(nrow(x)/nbin) + bins = x[1,1] + for (i in 1:(nbin-1)){ + binned[[i]] = x[((i-1)*step+1):(i*step),2] + bins = c(bins,x[i*step+1,1]) + } + binned[[nbin]] = x[((nbin-1)*step+1):nrow(x),2] + bins[nbin+1] = x[nrow(x),1] + } +# bin lavel + labels = character(0) + for (i in 1:nbin){ + labels = c(labels,paste(format(bins[i],digits=2,nsmall=2),format(bins[i+1],digits=2,nsmall=2),sep='~')) + } + list(binned=binned,bins=bins,labels=labels) +} +#res = binaverage(x,3,'rank') + +# CDF plot and KS.test +mycdf = function(list,title,labels,legendposition,xlabel,legend_title){ + L = length(list) + + # barplot for mean and std + avg = numeric(L) + err = numeric(L) + for (i in 1:L){ + avg[i] = mean(list[[i]]) + err[i] = sd(list[[i]]) + } + #print(list[[1]]) + #print(list[[2]]) + #print(avg) + #print(err) + par(cex=1.5,mar=c(8,6,6,4)) + xticks <- barplot(avg,names.arg=labels,las=2,ylab=xlabel,main='mean and standard deviation',xlab=legend_title,ylim=c(0,max(avg+err))) + arrows(xticks,avg+err, xticks, avg-err, angle=90, code=3, length=0.0) + + if (L>1){ + # ks test + cat('\nKS test:\n') + cat('sample1\tsample2\tp-value\n') + cat('-------------------------------------------------\n') + for (i in 1:(L-1)){ + for (j in (i+1):L){ + cat(labels[i],'\t',labels[j],'\t') + ks = ks.test(list[[i]],list[[j]]) + pv = max(2.2e-16,ks$p.value) + pv = format(pv,digits=3,nsmall=2) + cat(pv,'\n') + } + } + cat('-------------------------------------------------\n') + } + if (L == 2){ + title = paste(title,'\np=',pv,sep='') + } + # cdf plot + listx = list() + listy = list() + mi = 1e10 + ma = 1e-10 + for (i in 1:L){ + mi = min(mi,list[[i]]) + ma = max(ma,list[[i]]) + } + for (i in 1:L){ + listx[[i]] = c(mi,listx[i],ma) + listy[[i]] = c(0,listy[i],1) + } + for (i in 1:L){ + mi = min(mi,list[[i]]) + ma = max(ma,list[[i]]) + listx[[i]] = sort(list[[i]]) + listy[[i]] = c(1:length(list[[i]]))/length(list[[i]]) + } +#par(xlog=(xlog=='xlog')) + plot(listx[[1]],listy[[1]],type='l',lty=1,lwd=2,col=2,main=title,xlab=xlabel,ylab='cumulative frequency') + for (i in 2:L){ + lines(listx[[i]],listy[[i]],type='l',lty=i,lwd=2,col=i+1) + } + # legend + for (i in 1:L){ + labels[i] = paste(labels[i],', n=',length(list[[i]]),sep='') + } + legend(legendposition,legend=labels,col=2:(L+1), lty=1:L,lwd=2, bty='n',title=legend_title) +}