Mercurial > repos > dereeper > mlmm
view source_library/mlmm1.r @ 1:380b364980f9 draft default tip
planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
author | dereeper |
---|---|
date | Mon, 16 Apr 2018 08:50:05 -0400 |
parents | |
children |
line wrap: on
line source
############################################################################################################################################## ###MLMM - Multi-Locus Mixed Model ###SET OF FUNCTIONS TO CARRY GWAS CORRECTING FOR POPULATION STRUCTURE WHILE INCLUDING COFACTORS THROUGH A STEPWISE-REGRESSION APPROACH ####### # ##note: require EMMA #library(emma) #source('emma.r') # ##REQUIRED DATA & FORMAT # #PHENOTYPE - Y: a vector of length m, with names(Y)=individual names #GENOTYPE - X: a n by m matrix, where n=number of individuals, m=n umber of SNPs, with rownames(X)=individual names, and colnames(X)=SNP names #KINSHIP - K: a n by n matrix, with rownames(K)=colnames(K)=individual names #each of these data being sorted in the same way, according to the individual name # ##FOR PLOTING THE GWAS RESULTS #SNP INFORMATION - snp_info: a data frame having at least 3 columns: # - 1 named 'SNP', with SNP names (same as colnames(X)), # - 1 named 'Chr', with the chromosome number to which belong each SNP # - 1 named 'Pos', with the position of the SNP onto the chromosome it belongs to. ####### # ##FUNCTIONS USE #save this file somewhere on your computer and source it! #source('path/mlmm.r') # ###FORWARD + BACKWARD ANALYSES #mygwas<-mlmm(Y,X,K,nbchunks,maxsteps) #X,Y,K as described above #nbchunks: an integer defining the number of chunks of to run the analysis, allows to decrease the memory usage ==> minimum=2, increase it if you do not have enough memory #maxsteps: maximum number of steps desired in the forward approach. The forward approach breaks automatically once the pseudo-heritability is close to 0, # however to avoid doing too many steps in case the pseudo-heritability does not reach a value close to 0, this parameter is also used. # It's value must be specified as an integer >= 3 # ###RESULTS # ##STEPWISE TABLE #mygwas$step_table # ##PLOTS # ##PLOTS FORM THE FORWARD TABLE #plot_step_table(mygwas,type=c('h2','maxpval','BIC','extBIC')) # ##RSS PLOT #plot_step_RSS(mygwas) # ##GWAS MANHATTAN PLOTS # #FORWARD STEPS #plot_fwd_GWAS(mygwas,step,snp_info,pval_filt) #step=the step to be plotted in the forward approach, where 1 is the EMMAX scan (no cofactor) #snp_info as described above #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot # #OPTIMAL MODELS #Automatic identification of the optimal models within the forwrad-backward models according to the extendedBIC or multiple-bonferonni criteria # #plot_opt_GWAS(mygwas,opt=c('extBIC','mbonf'),snp_info,pval_filt) #snp_info as described above #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot # ##GWAS MANHATTAN PLOT ZOOMED IN A REGION OF INTEREST #plot_fwd_region(mygwas,step,snp_info,pval_filt,chrom,pos1,pos2) #step=the step to be plotted in the forward approach, where 1 is the EMMAX scan (no cofactor) #snp_info as described above #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot #chrom is an integer specifying the chromosome on which the region of interest is #pos1, pos2 are integers delimiting the region of interest in the same unit as Pos in snp_info # #plot_opt_region(mygwas,opt=c('extBIC','mbonf'),snp_info,pval_filt,chrom,pos1,pos2) #snp_info as described above #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot #chrom is an integer specifying the chromosome on which the region of interest is #pos1, pos2 are integers delimiting the region of interest in the same unit as Pos in snp_info # ##QQPLOTS of pvalues #qqplot_fwd_GWAS(mygwas,nsteps) #nsteps=maximum number of forward steps to be displayed # #qqplot_opt_GWAS(mygwas,opt=c('extBIC','mbonf')) # ############################################################################################################################################## mlmm<-function(Y,X,K,nbchunks,maxsteps) { n<-length(Y) m<-ncol(X) stopifnot(ncol(K) == n) stopifnot(nrow(K) == n) stopifnot(nrow(X) == n) stopifnot(nbchunks >= 2) stopifnot(maxsteps >= 3) #INTERCEPT Xo<-rep(1,n) #K MATRIX NORMALISATION K_norm<-(n-1)/sum((diag(n)-matrix(1,n,n)/n)*K)*K rm(K) #step 0 : NULL MODEL cof_fwd<-list() cof_fwd[[1]]<-as.matrix(Xo) colnames(cof_fwd[[1]])<-'Xo' mod_fwd<-list() mod_fwd[[1]]<-emma.REMLE(Y,cof_fwd[[1]],K_norm) herit_fwd<-list() herit_fwd[[1]]<-mod_fwd[[1]]$vg/(mod_fwd[[1]]$vg+mod_fwd[[1]]$ve) RSSf<-list() RSSf[[1]]<-'NA' RSS_H0<-list() RSS_H0[[1]]<-'NA' df1<-1 df2<-list() df2[[1]]<-'NA' Ftest<-list() Ftest[[1]]<-'NA' pval<-list() pval[[1]]<-'NA' fwd_lm<-list() cat('null model done! pseudo-h=',round(herit_fwd[[1]],3),'\n') #step 1 : EMMAX M<-solve(chol(mod_fwd[[1]]$vg*K_norm+mod_fwd[[1]]$ve*diag(n))) Y_t<-crossprod(M,Y) cof_fwd_t<-crossprod(M,cof_fwd[[1]]) fwd_lm[[1]]<-summary(lm(Y_t~0+cof_fwd_t)) Res_H0<-fwd_lm[[1]]$residuals Q_<-qr.Q(qr(cof_fwd_t)) RSS<-list() for (j in 1:(nbchunks-1)) { X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[1]])])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))]) RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t)} X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[1]])])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof_fwd[[1]])-1))]) RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t,j) RSSf[[2]]<-unlist(RSS) RSS_H0[[2]]<-sum(Res_H0^2) df2[[2]]<-n-df1-ncol(cof_fwd[[1]]) Ftest[[2]]<-(rep(RSS_H0[[2]],length(RSSf[[2]]))/RSSf[[2]]-1)*df2[[2]]/df1 pval[[2]]<-pf(Ftest[[2]],df1,df2[[2]],lower.tail=FALSE) cof_fwd[[2]]<-cbind(cof_fwd[[1]],X[,colnames(X) %in% names(which(RSSf[[2]]==min(RSSf[[2]]))[1])]) colnames(cof_fwd[[2]])<-c(colnames(cof_fwd[[1]]),names(which(RSSf[[2]]==min(RSSf[[2]]))[1])) mod_fwd[[2]]<-emma.REMLE(Y,cof_fwd[[2]],K_norm) herit_fwd[[2]]<-mod_fwd[[2]]$vg/(mod_fwd[[2]]$vg+mod_fwd[[2]]$ve) rm(M,Y_t,cof_fwd_t,Res_H0,Q_,RSS) cat('step 1 done! pseudo-h=',round(herit_fwd[[2]],3),'\n') #FORWARD for (i in 3:(maxsteps)) { if (herit_fwd[[i-2]] < 0.01) break else { M<-solve(chol(mod_fwd[[i-1]]$vg*K_norm+mod_fwd[[i-1]]$ve*diag(n))) Y_t<-crossprod(M,Y) cof_fwd_t<-crossprod(M,cof_fwd[[i-1]]) fwd_lm[[i-1]]<-summary(lm(Y_t~0+cof_fwd_t)) Res_H0<-fwd_lm[[i-1]]$residuals Q_ <- qr.Q(qr(cof_fwd_t)) RSS<-list() for (j in 1:(nbchunks-1)) { X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[i-1]])])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))]) RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t)} X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[i-1]])])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof_fwd[[i-1]])-1))]) RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t,j) RSSf[[i]]<-unlist(RSS) RSS_H0[[i]]<-sum(Res_H0^2) df2[[i]]<-n-df1-ncol(cof_fwd[[i-1]]) Ftest[[i]]<-(rep(RSS_H0[[i]],length(RSSf[[i]]))/RSSf[[i]]-1)*df2[[i]]/df1 pval[[i]]<-pf(Ftest[[i]],df1,df2[[i]],lower.tail=FALSE) cof_fwd[[i]]<-cbind(cof_fwd[[i-1]],X[,colnames(X) %in% names(which(RSSf[[i]]==min(RSSf[[i]]))[1])]) colnames(cof_fwd[[i]])<-c(colnames(cof_fwd[[i-1]]),names(which(RSSf[[i]]==min(RSSf[[i]]))[1])) mod_fwd[[i]]<-emma.REMLE(Y,cof_fwd[[i]],K_norm) herit_fwd[[i]]<-mod_fwd[[i]]$vg/(mod_fwd[[i]]$vg+mod_fwd[[i]]$ve) rm(M,Y_t,cof_fwd_t,Res_H0,Q_,RSS)} cat('step ',i-1,' done! pseudo-h=',round(herit_fwd[[i]],3),'\n')} rm(i) ##gls at last forward step M<-solve(chol(mod_fwd[[length(mod_fwd)]]$vg*K_norm+mod_fwd[[length(mod_fwd)]]$ve*diag(n))) Y_t<-crossprod(M,Y) cof_fwd_t<-crossprod(M,cof_fwd[[length(mod_fwd)]]) fwd_lm[[length(mod_fwd)]]<-summary(lm(Y_t~0+cof_fwd_t)) Res_H0<-fwd_lm[[length(mod_fwd)]]$residuals Q_ <- qr.Q(qr(cof_fwd_t)) RSS<-list() for (j in 1:(nbchunks-1)) { X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[length(mod_fwd)]])])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))]) RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t)} X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[length(mod_fwd)]])])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof_fwd[[length(mod_fwd)]])-1))]) RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t,j) RSSf[[length(mod_fwd)+1]]<-unlist(RSS) RSS_H0[[length(mod_fwd)+1]]<-sum(Res_H0^2) df2[[length(mod_fwd)+1]]<-n-df1-ncol(cof_fwd[[length(mod_fwd)]]) Ftest[[length(mod_fwd)+1]]<-(rep(RSS_H0[[length(mod_fwd)+1]],length(RSSf[[length(mod_fwd)+1]]))/RSSf[[length(mod_fwd)+1]]-1)*df2[[length(mod_fwd)+1]]/df1 pval[[length(mod_fwd)+1]]<-pf(Ftest[[length(mod_fwd)+1]],df1,df2[[length(mod_fwd)+1]],lower.tail=FALSE) rm(M,Y_t,cof_fwd_t,Res_H0,Q_,RSS) ##get max pval at each forward step max_pval_fwd<-vector(mode="numeric",length=length(fwd_lm)) max_pval_fwd[1]<-0 for (i in 2:length(fwd_lm)) {max_pval_fwd[i]<-max(fwd_lm[[i]]$coef[2:i,4])} rm(i) ##get the number of parameters & Loglikelihood from ML at each step mod_fwd_LL<-list() mod_fwd_LL[[1]]<-list(nfixed=ncol(cof_fwd[[1]]),LL=emma.MLE(Y,cof_fwd[[1]],K_norm)$ML) for (i in 2:length(cof_fwd)) {mod_fwd_LL[[i]]<-list(nfixed=ncol(cof_fwd[[i]]),LL=emma.MLE(Y,cof_fwd[[i]],K_norm)$ML)} rm(i) cat('backward analysis','\n') ##BACKWARD (1st step == last fwd step) dropcof_bwd<-list() cof_bwd<-list() mod_bwd <- list() bwd_lm<-list() herit_bwd<-list() dropcof_bwd[[1]]<-'NA' cof_bwd[[1]]<-as.matrix(cof_fwd[[length(mod_fwd)]][,!colnames(cof_fwd[[length(mod_fwd)]]) %in% dropcof_bwd[[1]]]) colnames(cof_bwd[[1]])<-colnames(cof_fwd[[length(mod_fwd)]])[!colnames(cof_fwd[[length(mod_fwd)]]) %in% dropcof_bwd[[1]]] mod_bwd[[1]]<-emma.REMLE(Y,cof_bwd[[1]],K_norm) herit_bwd[[1]]<-mod_bwd[[1]]$vg/(mod_bwd[[1]]$vg+mod_bwd[[1]]$ve) M<-solve(chol(mod_bwd[[1]]$vg*K_norm+mod_bwd[[1]]$ve*diag(n))) Y_t<-crossprod(M,Y) cof_bwd_t<-crossprod(M,cof_bwd[[1]]) bwd_lm[[1]]<-summary(lm(Y_t~0+cof_bwd_t)) rm(M,Y_t,cof_bwd_t) for (i in 2:length(mod_fwd)) { dropcof_bwd[[i]]<-(colnames(cof_bwd[[i-1]])[2:ncol(cof_bwd[[i-1]])])[which(abs(bwd_lm[[i-1]]$coef[2:nrow(bwd_lm[[i-1]]$coef),3])==min(abs(bwd_lm[[i-1]]$coef[2:nrow(bwd_lm[[i-1]]$coef),3])))] cof_bwd[[i]]<-as.matrix(cof_bwd[[i-1]][,!colnames(cof_bwd[[i-1]]) %in% dropcof_bwd[[i]]]) colnames(cof_bwd[[i]])<-colnames(cof_bwd[[i-1]])[!colnames(cof_bwd[[i-1]]) %in% dropcof_bwd[[i]]] mod_bwd[[i]]<-emma.REMLE(Y,cof_bwd[[i]],K_norm) herit_bwd[[i]]<-mod_bwd[[i]]$vg/(mod_bwd[[i]]$vg+mod_bwd[[i]]$ve) M<-solve(chol(mod_bwd[[i]]$vg*K_norm+mod_bwd[[i]]$ve*diag(n))) Y_t<-crossprod(M,Y) cof_bwd_t<-crossprod(M,cof_bwd[[i]]) bwd_lm[[i]]<-summary(lm(Y_t~0+cof_bwd_t)) rm(M,Y_t,cof_bwd_t)} rm(i) ##get max pval at each backward step max_pval_bwd<-vector(mode="numeric",length=length(bwd_lm)) for (i in 1:(length(bwd_lm)-1)) {max_pval_bwd[i]<-max(bwd_lm[[i]]$coef[2:(length(bwd_lm)+1-i),4])} max_pval_bwd[length(bwd_lm)]<-0 ##get the number of parameters & Loglikelihood from ML at each step mod_bwd_LL<-list() mod_bwd_LL[[1]]<-list(nfixed=ncol(cof_bwd[[1]]),LL=emma.MLE(Y,cof_bwd[[1]],K_norm)$ML) for (i in 2:length(cof_bwd)) {mod_bwd_LL[[i]]<-list(nfixed=ncol(cof_bwd[[i]]),LL=emma.MLE(Y,cof_bwd[[i]],K_norm)$ML)} rm(i) cat('creating output','\n') ##Forward Table: Fwd + Bwd Tables #Compute parameters for model criteria BIC<-function(x){-2*x$LL+(x$nfixed+1)*log(n)} extBIC<-function(x){BIC(x)+2*lchoose(m,x$nfixed-1)} fwd_table<-data.frame(step=ncol(cof_fwd[[1]])-1,step_=paste('fwd',ncol(cof_fwd[[1]])-1,sep=''),cof='NA',ncof=ncol(cof_fwd[[1]])-1,h2=herit_fwd[[1]] ,maxpval=max_pval_fwd[1],BIC=BIC(mod_fwd_LL[[1]]),extBIC=extBIC(mod_fwd_LL[[1]])) for (i in 2:(length(mod_fwd))) {fwd_table<-rbind(fwd_table, data.frame(step=ncol(cof_fwd[[i]])-1,step_=paste('fwd',ncol(cof_fwd[[i]])-1,sep=''),cof=paste('+',colnames(cof_fwd[[i]])[i],sep=''),ncof=ncol(cof_fwd[[i]])-1,h2=herit_fwd[[i]] ,maxpval=max_pval_fwd[i],BIC=BIC(mod_fwd_LL[[i]]),extBIC=extBIC(mod_fwd_LL[[i]])))} rm(i) bwd_table<-data.frame(step=length(mod_fwd),step_=paste('bwd',0,sep=''),cof=paste('-',dropcof_bwd[[1]],sep=''),ncof=ncol(cof_bwd[[1]])-1,h2=herit_bwd[[1]] ,maxpval=max_pval_bwd[1],BIC=BIC(mod_bwd_LL[[1]]),extBIC=extBIC(mod_bwd_LL[[1]])) for (i in 2:(length(mod_bwd))) {bwd_table<-rbind(bwd_table, data.frame(step=length(mod_fwd)+i-1,step_=paste('bwd',i-1,sep=''),cof=paste('-',dropcof_bwd[[i]],sep=''),ncof=ncol(cof_bwd[[i]])-1,h2=herit_bwd[[i]] ,maxpval=max_pval_bwd[i],BIC=BIC(mod_bwd_LL[[i]]),extBIC=extBIC(mod_bwd_LL[[i]])))} rm(i,BIC,extBIC,max_pval_fwd,max_pval_bwd,dropcof_bwd) fwdbwd_table<-rbind(fwd_table,bwd_table) #RSS for plot mod_fwd_RSS<-vector() mod_fwd_RSS[1]<-sum((Y-cof_fwd[[1]]%*%fwd_lm[[1]]$coef[,1])^2) for (i in 2:length(mod_fwd)) {mod_fwd_RSS[i]<-sum((Y-cof_fwd[[i]]%*%fwd_lm[[i]]$coef[,1])^2)} mod_bwd_RSS<-vector() mod_bwd_RSS[1]<-sum((Y-cof_bwd[[1]]%*%bwd_lm[[1]]$coef[,1])^2) for (i in 2:length(mod_bwd)) {mod_bwd_RSS[i]<-sum((Y-cof_bwd[[i]]%*%bwd_lm[[i]]$coef[,1])^2)} expl_RSS<-c(1-sapply(mod_fwd_RSS,function(x){x/mod_fwd_RSS[1]}),1-sapply(mod_bwd_RSS,function(x){x/mod_bwd_RSS[length(mod_bwd_RSS)]})) h2_RSS<-c(unlist(herit_fwd),unlist(herit_bwd))*(1-expl_RSS) unexpl_RSS<-1-expl_RSS-h2_RSS plot_RSS<-t(apply(cbind(expl_RSS,h2_RSS,unexpl_RSS),1,cumsum)) #GLS pvals at each step pval_step<-list() pval_step[[1]]<-list(out=data.frame('SNP'=colnames(X),'pval'=pval[[2]]),cof='') for (i in 2:(length(mod_fwd))) {pval_step[[i]]<-list(out=rbind(data.frame(SNP=colnames(cof_fwd[[i]])[-1],'pval'=fwd_lm[[i]]$coef[2:i,4]), data.frame(SNP=colnames(X)[-which(colnames(X) %in% colnames(cof_fwd[[i]]))],'pval'=pval[[i+1]])),cof=colnames(cof_fwd[[i]])[-1])} #GLS pvals for best models according to extBIC and mbonf opt_extBIC<-fwdbwd_table[which(fwdbwd_table$extBIC==min(fwdbwd_table$extBIC))[1],] opt_mbonf<-(fwdbwd_table[which(fwdbwd_table$maxpval<=0.05/m),])[which(fwdbwd_table[which(fwdbwd_table$maxpval<=0.05/m),]$ncof==max(fwdbwd_table[which(fwdbwd_table$maxpval<=0.05/m),]$ncof))[1],] bestmodel_pvals<-function(model) {if(substr(model$step_,start=0,stop=3)=='fwd') { pval_step[[as.integer(substring(model$step_,first=4))+1]]} else if (substr(model$step_,start=0,stop=3)=='bwd') { cof<-cof_bwd[[as.integer(substring(model$step_,first=4))+1]] mixedmod<-emma.REMLE(Y,cof,K_norm) M<-solve(chol(mixedmod$vg*K_norm+mixedmod$ve*diag(n))) Y_t<-crossprod(M,Y) cof_t<-crossprod(M,cof) GLS_lm<-summary(lm(Y_t~0+cof_t)) Res_H0<-GLS_lm$residuals Q_ <- qr.Q(qr(cof_t)) RSS<-list() for (j in 1:(nbchunks-1)) { X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof)])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))]) RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t)} X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof)])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof)-1))]) RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) rm(X_t,j) RSSf<-unlist(RSS) RSS_H0<-sum(Res_H0^2) df2<-n-df1-ncol(cof) Ftest<-(rep(RSS_H0,length(RSSf))/RSSf-1)*df2/df1 pval<-pf(Ftest,df1,df2,lower.tail=FALSE) list(out=rbind(data.frame(SNP=colnames(cof)[-1],'pval'=GLS_lm$coef[2:(ncol(cof)),4]), data.frame('SNP'=colnames(X)[-which(colnames(X) %in% colnames(cof))],'pval'=pval)),cof=colnames(cof)[-1])} else {cat('error \n')}} opt_extBIC_out<-bestmodel_pvals(opt_extBIC) opt_mbonf_out<-bestmodel_pvals(opt_mbonf) list(step_table=fwdbwd_table,pval_step=pval_step,RSSout=plot_RSS,bonf_thresh=-log10(0.05/m),opt_extBIC=opt_extBIC_out,opt_mbonf=opt_mbonf_out)} plot_step_table<-function(x,type){ if (type=='h2') {plot(x$step_table$step,x$step_table$h2,type='b',lty=2,pch=20,col='darkblue',xlab='step',ylab='h2') abline(v=(nrow(x$step_table)/2-0.5),lty=2)} else if (type=='maxpval'){plot(x$step_table$step,-log10(x$step_table$maxpval),type='b',lty=2,pch=20,col='darkblue',xlab='step',ylab='-log10(max_Pval)') abline(h=x$bonf_thresh,lty=2) abline(v=(nrow(x$step_table)/2-0.5),lty=2)} else if (type=='BIC'){plot(x$step_table$step,x$step_table$BIC,type='b',lty=2,pch=20,col='darkblue',xlab='step',ylab='BIC') abline(v=(nrow(x$step_table)/2-0.5),lty=2)} else if (type=='extBIC'){plot(x$step_table$step,x$step_table$extBIC,type='b',lty=2,pch=20,col='darkblue',xlab='step',ylab='EBIC') abline(v=(nrow(x$step_table)/2-0.5),lty=2)} else {cat('error! \n argument type must be one of h2, maxpval, BIC, extBIC')}} plot_step_RSS<-function(x){ op<-par(mar=c(5, 5, 2, 2)) plot(0,0,xlim=c(0,nrow(x$RSSout)-1),ylim=c(0,1),xlab='step',ylab='%var',col=0) polygon(c(0:(nrow(x$RSSout)-1),(nrow(x$RSSout)-1),0), c(x$RSSout[,3],0,0), col='brown1', border=0) polygon(c(0:(nrow(x$RSSout)-1),(nrow(x$RSSout)-1),0), c(x$RSSout[,2],0,0), col='forestgreen', border=0) polygon(c(0:(nrow(x$RSSout)-1),(nrow(x$RSSout)-1),0), c(x$RSSout[,1],0,0), col='dodgerblue4', border=0) abline(v=(nrow(x$step_table)/2-0.5),lty=2) par(op)} plot_GWAS<-function(x) { output_<-x$out[order(x$out$Pos),] output_ok<-output_[order(output_$Chr),] maxpos<-c(0,cumsum(as.numeric(aggregate(output_ok$Pos,list(output_ok$Chr),max)$x+max(cumsum(as.numeric(aggregate(output_ok$Pos,list(output_ok$Chr),max)$x)))/200))) plot_col<-rep(c('gray10','gray60'),ceiling(max(unique(output_ok$Chr))/2)) # plot_col<-c('blue','darkgreen','red','cyan','purple') size<-aggregate(output_ok$Pos,list(output_ok$Chr),length)$x a<-rep(maxpos[1],size[1]) b<-rep(plot_col[1],size[1]) for (i in 2:max(unique(output_ok$Chr))){ a<-c(a,rep(maxpos[i],size[i])) b<-c(b,rep(plot_col[i],size[i]))} output_ok$xpos<-output_ok$Pos+a output_ok$col<-b output_ok$col[output_ok$SNP %in% x$cof]<-'red' d<-(aggregate(output_ok$xpos,list(output_ok$Chr),min)$x+aggregate(output_ok$xpos,list(output_ok$Chr),max)$x)/2 plot(output_ok$xpos,-log10(output_ok$pval),col=output_ok$col,pch=20,ylab='-log10(pval)',xaxt='n',xlab='chromosome') axis(1,tick=FALSE,at=d,labels=c(1:max(unique(output_ok$Chr)))) abline(h=x$bonf_thresh,lty=3,col='black') if (length(output_ok$pval[-log10(output_ok$pval) > x$bonf_thresh]) > 0) { text(output_ok$xpos[-log10(output_ok$pval) > x$bonf_thresh], -log10(output_ok$pval[-log10(output_ok$pval) > x$bonf_thresh]), output_ok$SNP[-log10(output_ok$pval) > x$bonf_thresh], pos=3, cex=0.7) legend("topright", lty=3, paste("bonf thresh :", x$bonf_thresh ,sep=" ")) } else { legend("topright", lty=3, paste("bonf thresh :", x$bonf_thresh ,sep=" ")) } } plot_region<-function(x,chrom,pos1,pos2){ region<-subset(x$out,Chr==chrom & Pos>=pos1 & Pos <=pos2) region$col<- if (chrom %% 2 == 0) {'gray60'} else {'gray10'} region$col[which(region$SNP %in% x$cof)]<-'red' plot(region$Pos,-log10(region$pval),type='p',pch=20,main=paste('chromosome',chrom,sep=''),xlab='position (bp)',ylab='-log10(pval)',col=region$col,xlim=c(pos1,pos2)) abline(h=x$bonf_thresh,lty=3,col='black')} plot_fwd_GWAS<-function(x,step,snp_info,pval_filt) { stopifnot(step<=length(x$pval_step)) output<-list(out=subset(merge(snp_info,x$pval_step[[step]]$out,by='SNP'),pval<=pval_filt),cof=x$pval_step[[step]]$cof,bonf_thresh=x$bonf_thresh) plot_GWAS(output)} plot_fwd_region<-function(x,step,snp_info,pval_filt,chrom,pos1,pos2) { stopifnot(step<=length(x$pval_step)) output<-list(out=subset(merge(snp_info,x$pval_step[[step]]$out,by='SNP'),pval<=pval_filt),cof=x$pval_step[[step]]$cof,bonf_thresh=x$bonf_thresh) plot_region(output,chrom,pos1,pos2)} plot_opt_GWAS<-function(x,opt,snp_info,pval_filt) { if (opt=='extBIC') {output<-list(out=subset(merge(snp_info,x$opt_extBIC$out,by='SNP'),pval<=pval_filt),cof=x$opt_extBIC$cof,bonf_thresh=x$bonf_thresh) plot_GWAS(output)} else if (opt=='mbonf') {output<-list(out=subset(merge(snp_info,x$opt_mbonf$out,by='SNP'),pval<=pval_filt),cof=x$opt_mbonf$cof,bonf_thresh=x$bonf_thresh) plot_GWAS(output)} else {cat('error! \n opt must be extBIC or mbonf')}} plot_opt_region<-function(x,opt,snp_info,pval_filt,chrom,pos1,pos2) { if (opt=='extBIC') {output<-list(out=subset(merge(snp_info,x$opt_extBIC$out,by='SNP'),pval<=pval_filt),cof=x$opt_extBIC$cof,bonf_thresh=x$bonf_thresh) plot_region(output,chrom,pos1,pos2)} else if (opt=='mbonf') {output<-list(out=subset(merge(snp_info,x$opt_mbonf$out,by='SNP'),pval<=pval_filt),cof=x$opt_mbonf$cof,bonf_thresh=x$bonf_thresh) plot_region(output,chrom,pos1,pos2)} else {cat('error! \n opt must be extBIC or mbonf')}} qqplot_fwd_GWAS<-function(x,nsteps){ stopifnot(nsteps<=length(x$pval_step)) e<--log10(ppoints(nrow(x$pval_step[[1]]$out))) ostep<-list() ostep[[1]]<--log10(sort(x$pval_step[[1]]$out$pval)) for (i in 2:nsteps) {ostep[[i]]<--log10(sort(x$pval_step[[i]]$out$pval))} maxp<-ceiling(max(unlist(ostep))) plot(e,ostep[[1]],type='b',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp)) abline(0,1,col="dark grey") for (i in 2:nsteps) { par(new=T) plot(e,ostep[[i]],type='b',pch=20,cex=0.8,col=i,axes='F',xlab='',ylab='',xlim=c(0,max(e)+1),ylim=c(0,maxp))} legend(0,maxp,lty=1,pch=20,col=c(1:length(ostep)),paste(c(0:(length(ostep)-1)),'cof',sep=' ')) } qqplot_opt_GWAS<-function(x,opt){ if (opt=='extBIC') { e<--log10(ppoints(nrow(x$opt_extBIC$out))) o<--log10(sort(x$opt_extBIC$out$pval)) maxp<-ceiling(max(o)) plot(e,o,type='b',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp),main=paste('optimal model according to extBIC')) abline(0,1,col="dark grey")} else if (opt=='mbonf') { e<--log10(ppoints(nrow(x$opt_mbonf$out))) o<--log10(sort(x$opt_mbonf$out$pval)) maxp<-ceiling(max(o)) plot(e,o,type='b',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp),main=paste('optimal model according to mbonf')) abline(0,1,col="dark grey")} else {cat('error! \n opt must be extBIC or mbonf')}}