Next changeset 1:380b364980f9 (2018-04-16) |
Commit message:
Uploaded |
added:
mlmm/MLMM.pl mlmm/MLMM.sh mlmm/MLMM.xml mlmm/source_library/emma.mlmm.r mlmm/source_library/emma.r mlmm/source_library/mlmm.r mlmm/source_library/mlmm1.r mlmm/source_library/plot_MLMM.Ago.r |
b |
diff -r 000000000000 -r 6b7107812931 mlmm/MLMM.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mlmm/MLMM.pl Thu Jul 02 05:42:38 2015 -0400 |
[ |
@@ -0,0 +1,155 @@ +#!/usr/bin/perl + +use strict; +use Switch; +use Getopt::Long; +use Bio::SeqIO; + + +my $usage = qq~Usage:$0 <args> [<opts>] +where <args> are: + -g, --geno <Genotype input> + -i, --info <SNP information. Genome position.> + -p, --pheno <Phenotype input> + -o, --out <output name> + -d, --directory <directory for MLMM R libraries> + -s, --step_number <number of steps. Maximum: 20. Default: 10> + -m, --method <Method: mbonf or extBIC. Default: mbonf> +~; +$usage .= "\n"; + +my ($geno,$map,$pheno,$out,$dir,$steps,$method); + + +GetOptions( + "geno=s" => \$geno, + "info=s" => \$map, + "pheno=s" => \$pheno, + "out=s" => \$out, + "dir=s" => \$dir, + "steps=s" => \$steps, + "method=s" => \$method +); + + +die $usage + if ( !$geno || !$map || !$pheno || !$out || !$dir || !$steps || !$method); + +my $max_steps = 10; +my $plot_opt = "mbonf"; +if ($method && $method ne 'mbonf' && $method ne 'extBIC') +{ + print "Aborted: Method must be mbonf or extBIC.\n"; + exit; +} +else +{ + $plot_opt = $method; +} +if ($steps && $steps !~/\d+/ && $steps > 20 && $steps < 2) +{ + print "Aborted: Number of steps must be greater than 2 and lower than 20.\n"; + exit; +} +else +{ + $max_steps = $steps; +} + + +my $chunk = 2; + +my $RSCRIPT_EXE = "Rscript"; +my $R_DIR = $dir; + + +my $head_trait = `head -1 $pheno`; +my @headers_traits = split(/\t/,$head_trait); +my $trait_name = $headers_traits[1]; + + +open( my $RCMD, ">rscript" ) or throw Error::Simple($!); + + + + +print $RCMD "Y_file <- \"" . $pheno . "\"\n"; +print $RCMD "X_file <- \"" . $geno . "\"\n"; +if($map) +{ + print $RCMD "map_file <- \"$map\"\n"; + print $RCMD "map <- read.table(map_file, sep = \"\\t\", header = T)\n"; +} +print $RCMD "mlmm_data = list()\n"; +print $RCMD "mlmm_data\$chunk <- $chunk\n"; +print $RCMD "mlmm_data\$maxsteps <- $max_steps\n"; +print $RCMD "genot <- read.table(X_file, sep = \"\\t\", header = T)\n"; +print $RCMD "genot_mat <- as.matrix(genot[, 2:ncol(genot)])\n"; +print $RCMD "rownames(genot_mat) <- genot\$Ind_id\n"; + +print $RCMD "phenot <- read.table(Y_file, sep = \"\\t\", header = T)\n"; + + + +# missing data imputation +print $RCMD "genot_imp <- genot_mat\n"; +print $RCMD "average <- colMeans(genot_imp, na.rm = T)\n"; +print $RCMD "for (i in 1:ncol(genot_imp)){genot_imp[is.na(genot_imp[,i]), i] <- average[i]}\n"; + +# kinship matrix computation +print $RCMD "average <- colMeans(genot_imp, na.rm = T)\n"; +print $RCMD "stdev <- apply(genot_imp, 2, sd)\n"; +print $RCMD "genot_stand <- sweep(sweep(genot_imp, 2, average, \"-\"), 2, stdev, \"/\")\n"; +print $RCMD "K_mat <- (genot_stand %*% t(genot_stand)) / ncol(genot_stand)\n"; +print $RCMD "write.table(K_mat, '$out.kinship', sep='\\t', dec='.', quote=F, col.names=T, row.names=T)\n"; + +print $RCMD "source(\"" . $R_DIR. "/mlmm.r\")\n"; +print $RCMD "source(\"" . $R_DIR. "/emma.r\")\n"; + +# mlmm +print $RCMD "mygwas <- mlmm(Y = phenot\$$trait_name, X = genot_imp, K = K_mat, nbchunks=mlmm_data\$chunk, maxsteps=mlmm_data\$maxsteps)\n"; + +# plots +print $RCMD "pdf('$out.pdf')\n"; +print $RCMD "plot_step_table(mygwas, \"h2\")\n"; +print $RCMD "plot_step_table(mygwas, \"extBIC\")\n"; +print $RCMD "plot_step_table(mygwas, \"maxpval\")\n"; +print $RCMD "plot_step_RSS(mygwas)\n"; +# for (my $j = 1; $j <= ($max_steps - 1); $j++) +# { + # print $RCMD "plot_fwd_GWAS(mygwas, step = $j, snp_info = map, pval_filt = 0.1)\n"; +# } +print $RCMD "plot_opt_GWAS(mygwas, opt = \"extBIC\", snp_info = map, pval_filt = 0.1)\n"; +print $RCMD "plot_opt_GWAS(mygwas, opt = \"mbonf\", snp_info = map, pval_filt = 0.1)\n"; +#print $RCMD "qqplot_fwd_GWAS(mygwas, nsteps = mlmm_data\$maxsteps)\n"; +print $RCMD "qqplot_opt_GWAS(mygwas, opt = \"extBIC\")\n"; +print $RCMD "qqplot_opt_GWAS(mygwas, opt = \"mbonf\")\n"; + +# outputs +print $RCMD "write.table(mygwas\$RSSout, '$out.rss', sep='\\t', dec='.', quote=F, col.names=T, row.names=F)\n"; +print $RCMD "write.table(mygwas\$step_table, '$out.steptable', sep='\\t', dec='.', quote=F, col.names=T, row.names=F)\n"; + +$plot_opt = "\$opt_" . $plot_opt; +print $RCMD "pval = mygwas" . $plot_opt . "\$out\n"; +print $RCMD "colnames(pval) = c(\"Marker_name\", \"Pvalue\")\n"; +print $RCMD "info_tmp = map\n"; +print $RCMD "colnames(info_tmp) = c(\"Marker_name\", \"Chr\", \"Pos\")\n"; +print $RCMD "res_asso = pval\n"; +print $RCMD qq~ +if(exists("info_tmp")){ + res_asso = merge(info_tmp, res_asso, by="Marker_name") + if( !is.element("Trait", colnames(info_tmp)) ){ + m = matrix(data="traitname", ncol=1, nrow=nrow(res_asso), dimnames=list(c(), c("Trait"))) + res_asso = cbind(m, res_asso) + } +} +~; +print $RCMD "res_asso = res_asso[order(res_asso[, \"Trait\"], res_asso[, \"Chr\"], res_asso[, \"Pos\"]), ]\n"; +print $RCMD "write.table(res_asso, '$out.res_asso', sep='\t', dec='.', quote=F, col.names=T, row.names=F)\n"; +close($RCMD); + +system("$RSCRIPT_EXE --vanilla rscript"); + + + + |
b |
diff -r 000000000000 -r 6b7107812931 mlmm/MLMM.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mlmm/MLMM.sh Thu Jul 02 05:42:38 2015 -0400 |
b |
@@ -0,0 +1,28 @@ +#!/bin/bash +geno=$1 +map=$2 +pheno=$3 +steps=$4 +method=$5 +output=$6 +pdf=$7 +kinship=$8 +rss=$9 +step_table=${10} +log=${11} + +directory=`dirname $0` +mkdir tmpdir$$ +cp -rf $geno tmpdir$$/geno +cp -rf $map tmpdir$$/map +cp -rf $pheno tmpdir$$/pheno + +perl $directory/MLMM.pl -g tmpdir$$/geno -i tmpdir$$/map -p tmpdir$$/pheno -s $steps -m $method -o tmpdir$$/output -d $directory/source_library >>$log 2>&1 + +mv tmpdir$$/output.pdf $pdf +mv tmpdir$$/output.kinship $kinship +mv tmpdir$$/output.res_asso $output +mv tmpdir$$/output.rss $rss +mv tmpdir$$/output.steptable $step_table + + |
b |
diff -r 000000000000 -r 6b7107812931 mlmm/MLMM.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mlmm/MLMM.xml Thu Jul 02 05:42:38 2015 -0400 |
b |
@@ -0,0 +1,61 @@ +<tool id="mlmm" name="MLMM" version="1.0"> + <description>GWAS using Multi-Locus Mixed-Model (MLMM)</description> + <requirements> + <requirement type="binary">Rscript</requirement> + </requirements> + <command interpreter="bash">./MLMM.sh $geno $map $pheno $steps $method $output $pdf $kinship $rss $step_table $log + </command> + <inputs> + <param format="txt" name="geno" type="data" label="Genotype matrix" help="NxM, N = individuals in line, M = Markers in columns, Genotype coded in 0,1,2"/> + <param type="data" format="txt" name="map" label="SNP Information file" help="3 columns: SNP, Chrom, Pos"/> + <param format="txt" name="pheno" type="data" label="Phenotype matrix" help="NxT, N = individuals in line, T = Trait in columns (Phenot1, Phenot2...)"/> + <param type="text" name="steps" label="Maximum number of steps for the forward approach" value="10"/> + <param name="method" type="select"> + <option value="extBIC">EBIC</option> + <option value="mbonf" selected="True">MBonf</option> + </param> + </inputs> + <outputs> + <data format="txt" name="output" label="Association results"/> + <data format="txt" name="kinship" label="Kinship matrix"/> + <data format="pdf" name="pdf" label="PDF Graphical outputs"/> + <data format="txt" name="rss" label="RSS"/> + <data format="txt" name="step_table" label="Step Table"/> + <data format="txt" name="log" label="Log file"/> + </outputs> + <help> + + +.. class:: infomark + +**Program encapsulated in Galaxy by Southgreen** + +.. class:: infomark + +**MLMM version 1.0** + +----- + +============== + Please cite: +============== + +"An efficient multi-locus mixed-model approach for genome-wide association studies in structured populations.", **Segura V, Vilhjalmsson BJ, Platt A, Korte A, Seren U, Long Q, Nordborg M.**, Nature Genetics, 44: 825-830, 2012. + +----- + +=========== + Overview: +=========== + +MLMM is an efficient multi-locus mixed-model approach for genome-wide association studies in structured populations. + +----- + +For further informations, please visite the MLMM_ website. + + +.. _MLMM: https://sites.google.com/site/vincentosegura/mlmm + </help> + +</tool> |
b |
diff -r 000000000000 -r 6b7107812931 mlmm/source_library/emma.mlmm.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mlmm/source_library/emma.mlmm.r Thu Jul 02 05:42:38 2015 -0400 |
[ |
b'@@ -0,0 +1,1274 @@\n+emma.kinship <- function(snps, method="additive", use="all") {\n+ n0 <- sum(snps==0,na.rm=TRUE)\n+ nh <- sum(snps==0.5,na.rm=TRUE)\n+ n1 <- sum(snps==1,na.rm=TRUE)\n+ nNA <- sum(is.na(snps))\n+\n+ stopifnot(n0+nh+n1+nNA == length(snps))\n+\n+ if ( method == "dominant" ) {\n+ flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) > 0.5),nrow(snps),ncol(snps))\n+ snps[!is.na(snps) & (snps == 0.5)] <- flags[!is.na(snps) & (snps == 0.5)]\n+ }\n+ else if ( method == "recessive" ) {\n+ flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) < 0.5),nrow(snps),ncol(snps))\n+ snps[!is.na(snps) & (snps == 0.5)] <- flags[!is.na(snps) & (snps == 0.5)]\n+ }\n+ else if ( ( method == "additive" ) && ( nh > 0 ) ) {\n+ dsnps <- snps\n+ rsnps <- snps\n+ flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) > 0.5),nrow(snps),ncol(snps))\n+ dsnps[!is.na(snps) & (snps==0.5)] <- flags[!is.na(snps) & (snps==0.5)]\n+ flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) < 0.5),nrow(snps),ncol(snps))\n+ rsnps[!is.na(snps) & (snps==0.5)] <- flags[!is.na(snps) & (snps==0.5)]\n+ snps <- rbind(dsnps,rsnps)\n+ }\n+\n+ if ( use == "all" ) {\n+ mafs <- matrix(rowMeans(snps,na.rm=TRUE),nrow(snps),ncol(snps))\n+ snps[is.na(snps)] <- mafs[is.na(snps)]\n+ }\n+ else if ( use == "complete.obs" ) {\n+ snps <- snps[rowSums(is.na(snps))==0,]\n+ }\n+\n+ n <- ncol(snps)\n+ K <- matrix(nrow=n,ncol=n)\n+ diag(K) <- 1\n+\n+ for(i in 2:n) {\n+ for(j in 1:(i-1)) {\n+ x <- snps[,i]*snps[,j] + (1-snps[,i])*(1-snps[,j])\n+ K[i,j] <- sum(x,na.rm=TRUE)/sum(!is.na(x))\n+ K[j,i] <- K[i,j]\n+ }\n+ }\n+ return(K)\n+}\n+\n+emma.eigen.L <- function(Z,K,complete=TRUE) {\n+ if ( is.null(Z) ) {\n+ return(emma.eigen.L.wo.Z(K))\n+ }\n+ else {\n+ return(emma.eigen.L.w.Z(Z,K,complete))\n+ }\n+}\n+\n+emma.eigen.L.wo.Z <- function(K) {\n+ eig <- eigen(K,symmetric=TRUE)\n+ return(list(values=eig$values,vectors=eig$vectors))\n+}\n+\n+emma.eigen.L.w.Z <- function(Z,K,complete=TRUE) {\n+ if ( complete == FALSE ) {\n+ vids <- colSums(Z)>0\n+ Z <- Z[,vids]\n+ K <- K[vids,vids]\n+ }\n+ eig <- eigen(K%*%crossprod(Z,Z),symmetric=FALSE,EISPACK=TRUE)\n+ return(list(values=eig$values,vectors=qr.Q(qr(Z%*%eig$vectors),complete=TRUE)))\n+}\n+\n+emma.eigen.R <- function(Z,K,X,complete=TRUE) {\n+ if ( ncol(X) == 0 ) {\n+ return(emma.eigen.L(Z,K))\n+ }\n+ else if ( is.null(Z) ) {\n+ return(emma.eigen.R.wo.Z(K,X))\n+ }\n+ else {\n+ return(emma.eigen.R.w.Z(Z,K,X,complete))\n+ }\n+}\n+\n+emma.eigen.R.wo.Z <- function(K, X) {\n+ n <- nrow(X)\n+ q <- ncol(X)\n+ S <- diag(n)-X%*%solve(crossprod(X,X))%*%t(X)\n+ eig <- eigen(S%*%(K+diag(1,n))%*%S,symmetric=TRUE)\n+ stopifnot(!is.complex(eig$values))\n+ return(list(values=eig$values[1:(n-q)]-1,vectors=eig$vectors[,1:(n-q)]))\n+}\n+\n+emma.eigen.R.w.Z <- function(Z, K, X, complete = TRUE) {\n+ if ( complete == FALSE ) {\n+ vids <- colSums(Z) > 0\n+ Z <- Z[,vids]\n+ K <- K[vids,vids]\n+ }\n+ n <- nrow(Z)\n+ t <- ncol(Z)\n+ q <- ncol(X)\n+ \n+ SZ <- Z - X%*%solve(crossprod(X,X))%*%crossprod(X,Z)\n+ eig <- eigen(K%*%crossprod(Z,SZ),symmetric=FALSE,EISPACK=TRUE)\n+ if ( is.complex(eig$values) ) {\n+ eig$values <- Re(eig$values)\n+ eig$vectors <- Re(eig$vectors) \n+ }\n+ qr.X <- qr.Q(qr(X))\n+ return(list(values=eig$values[1:(t-q)],\n+ vectors=qr.Q(qr(cbind(SZ%*%eig$vectors[,1:(t-q)],qr.X)),\n+ complete=TRUE)[,c(1:(t-q),(t+1):n)])) \n+}\n+\n+emma.delta.ML.LL.wo.Z <- function(logdelta, lambda, etas, xi) {\n+ n <- length(xi)\n+ delta <- exp(logdelta)\n+ return( 0.5*(n*(log(n/(2*pi))-1-log(sum((etas*etas)/(lambda+delta))))-sum(log(xi+delta))) ) \n+}\n+\n+emma.delta.ML.LL.w.Z <- function(logdelta, lambda, etas.1, xi.1, n, etas.2.sq ) {\n+ t <- length(xi.1)\n+ delta <- exp(logdelta)\n+# stopifnot(length(lambda) == length(etas.1))\n+ return( 0.5*(n*(log(n/(2*pi))-1-log(sum(etas.1*etas.1/(lambda+delta))+etas.2.sq/delta))-(sum(log(xi.1+delta))+(n-t)*logdelta)) )\n+}\n+\n+emma.delta.ML.dLL.wo.Z <- functio'..b',drop=FALSE]))\n+ if ( nv == t ) {\n+ eig.R1 = emma.eigen.R.w.Z(Z,K,X)\n+ } \n+ }\n+\n+ for(j in 1:g) {\n+ vrows <- !is.na(ys[j,])\n+ if ( nv == t ) {\n+ yv <- ys[j,vrows]\n+ nr <- sum(vrows)\n+ if ( is.null(Z) ) {\n+ if ( nr == n ) {\n+ REMLE <- emma.REMLE(yv,X,K,NULL,ngrids,llim,ulim,esp,eig.R1)\n+ U <- eig.L$vectors * matrix(sqrt(1/(eig.L$values+REMLE$delta)),n,n,byrow=TRUE) \n+ }\n+ else {\n+ eig.L0 <- emma.eigen.L.wo.Z(K[vrows,vrows,drop=FALSE])\n+ REMLE <- emma.REMLE(yv,X[vrows,,drop=FALSE],K[vrows,vrows,drop=FALSE],NULL,ngrids,llim,ulim,esp)\n+ U <- eig.L0$vectors * matrix(sqrt(1/(eig.L0$values+REMLE$delta)),nr,nr,byrow=TRUE)\n+ }\n+ dfs[i,j] <- nr-q1\n+ }\n+ else {\n+ if ( nr == n ) {\n+ REMLE <- emma.REMLE(yv,X,K,Z,ngrids,llim,ulim,esp,eig.R1)\n+ U <- eig.L$vectors * matrix(c(sqrt(1/(eig.L$values+REMLE$delta)),rep(sqrt(1/REMLE$delta),n-t)),n,n,byrow=TRUE) \n+ }\n+ else {\n+ vtids <- as.logical(colSums(Z[vrows,,drop=FALSE]))\n+ eig.L0 <- emma.eigen.L.w.Z(Z[vrows,vtids,drop=FALSE],K[vtids,vtids,drop=FALSE])\n+ REMLE <- emma.REMLE(yv,X[vrows,,drop=FALSE],K[vtids,vtids,drop=FALSE],Z[vrows,vtids,drop=FALSE],ngrids,llim,ulim,esp)\n+ U <- eig.L0$vectors * matrix(c(sqrt(1/(eig.L0$values+REMLE$delta)),rep(sqrt(1/REMLE$delta),nr-sum(vtids))),nr,nr,byrow=TRUE)\n+ }\n+ dfs[i,j] <- nr-q1\n+ }\n+\n+ yt <- crossprod(U,yv)\n+ Xt <- crossprod(U,X[vrows,,drop=FALSE])\n+ iXX <- solve(crossprod(Xt,Xt))\n+ beta <- iXX%*%crossprod(Xt,yt)\n+ if ( !ponly ) {\n+ vgs[i,j] <- REMLE$vg\n+ ves[i,j] <- REMLE$ve\n+ REMLs[i,j] <- REMLE$REML\n+ }\n+ stats[i,j] <- beta[q1]/sqrt(iXX[q1,q1]*REMLE$vg)\n+ }\n+ else {\n+ if ( is.null(Z) ) {\n+ vtids <- vrows & vids\n+ eig.L0 <- emma.eigen.L.wo.Z(K[vtids,vtids,drop=FALSE])\n+ yv <- ys[j,vtids]\n+ nr <- sum(vtids)\n+ REMLE <- emma.REMLE(yv,X[vtids,,drop=FALSE],K[vtids,vtids,drop=FALSE],NULL,ngrids,llim,ulim,esp)\n+ U <- eig.L0$vectors * matrix(sqrt(1/(eig.L0$values+REMLE$delta)),nr,nr,byrow=TRUE)\n+ Xt <- crossprod(U,X[vtids,,drop=FALSE])\n+ dfs[i,j] <- nr-q1\n+ }\n+ else {\n+ vtids <- as.logical(colSums(Z[vrows,,drop=FALSE])) & vids\n+ vtrows <- vrows & as.logical(rowSums(Z[,vids,drop=FALSE]))\n+ eig.L0 <- emma.eigen.L.w.Z(Z[vtrows,vtids,drop=FALSE],K[vtids,vtids,drop=FALSE])\n+ yv <- ys[j,vtrows]\n+ nr <- sum(vtrows)\n+ REMLE <- emma.REMLE(yv,X[vtrows,,drop=FALSE],K[vtids,vtids,drop=FALSE],Z[vtrows,vtids,drop=FALSE],ngrids,llim,ulim,esp)\n+ U <- eig.L0$vectors * matrix(c(sqrt(1/(eig.L0$values+REMLE$delta)),rep(sqrt(1/REMLE$delta),nr-sum(vtids))),nr,nr,byrow=TRUE)\n+ Xt <- crossprod(U,X[vtrows,,drop=FALSE])\n+ dfs[i,j] <- nr-q1\n+ }\n+ yt <- crossprod(U,yv)\n+ iXX <- solve(crossprod(Xt,Xt))\n+ beta <- iXX%*%crossprod(Xt,yt)\n+ if ( !ponly ) {\n+ vgs[i,j] <- REMLE$vg\n+ ves[i,j] <- REMLE$ve\n+ REMLs[i,j] <- REMLE$REML\n+ }\n+ stats[i,j] <- beta[q1]/sqrt(iXX[q1,q1]*REMLE$vg)\n+ \n+ }\n+ }\n+ ps[i,] <- 2*pt(abs(stats[i,]),dfs[i,],lower.tail=FALSE) \n+ }\n+ } \n+ }\n+ if ( ponly ) {\n+ return (ps)\n+ }\n+ else {\n+ return (list(ps=ps,REMLs=REMLs,stats=stats,dfs=dfs,vgs=vgs,ves=ves))\n+ }\n+}\n' |
b |
diff -r 000000000000 -r 6b7107812931 mlmm/source_library/emma.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mlmm/source_library/emma.r Thu Jul 02 05:42:38 2015 -0400 |
[ |
b'@@ -0,0 +1,1274 @@\n+emma.kinship <- function(snps, method="additive", use="all") {\n+ n0 <- sum(snps==0,na.rm=TRUE)\n+ nh <- sum(snps==0.5,na.rm=TRUE)\n+ n1 <- sum(snps==1,na.rm=TRUE)\n+ nNA <- sum(is.na(snps))\n+\n+ stopifnot(n0+nh+n1+nNA == length(snps))\n+\n+ if ( method == "dominant" ) {\n+ flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) > 0.5),nrow(snps),ncol(snps))\n+ snps[!is.na(snps) & (snps == 0.5)] <- flags[!is.na(snps) & (snps == 0.5)]\n+ }\n+ else if ( method == "recessive" ) {\n+ flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) < 0.5),nrow(snps),ncol(snps))\n+ snps[!is.na(snps) & (snps == 0.5)] <- flags[!is.na(snps) & (snps == 0.5)]\n+ }\n+ else if ( ( method == "additive" ) && ( nh > 0 ) ) {\n+ dsnps <- snps\n+ rsnps <- snps\n+ flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) > 0.5),nrow(snps),ncol(snps))\n+ dsnps[!is.na(snps) & (snps==0.5)] <- flags[!is.na(snps) & (snps==0.5)]\n+ flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) < 0.5),nrow(snps),ncol(snps))\n+ rsnps[!is.na(snps) & (snps==0.5)] <- flags[!is.na(snps) & (snps==0.5)]\n+ snps <- rbind(dsnps,rsnps)\n+ }\n+\n+ if ( use == "all" ) {\n+ mafs <- matrix(rowMeans(snps,na.rm=TRUE),nrow(snps),ncol(snps))\n+ snps[is.na(snps)] <- mafs[is.na(snps)]\n+ }\n+ else if ( use == "complete.obs" ) {\n+ snps <- snps[rowSums(is.na(snps))==0,]\n+ }\n+\n+ n <- ncol(snps)\n+ K <- matrix(nrow=n,ncol=n)\n+ diag(K) <- 1\n+\n+ for(i in 2:n) {\n+ for(j in 1:(i-1)) {\n+ x <- snps[,i]*snps[,j] + (1-snps[,i])*(1-snps[,j])\n+ K[i,j] <- sum(x,na.rm=TRUE)/sum(!is.na(x))\n+ K[j,i] <- K[i,j]\n+ }\n+ }\n+ return(K)\n+}\n+\n+emma.eigen.L <- function(Z,K,complete=TRUE) {\n+ if ( is.null(Z) ) {\n+ return(emma.eigen.L.wo.Z(K))\n+ }\n+ else {\n+ return(emma.eigen.L.w.Z(Z,K,complete))\n+ }\n+}\n+\n+emma.eigen.L.wo.Z <- function(K) {\n+ eig <- eigen(K,symmetric=TRUE)\n+ return(list(values=eig$values,vectors=eig$vectors))\n+}\n+\n+emma.eigen.L.w.Z <- function(Z,K,complete=TRUE) {\n+ if ( complete == FALSE ) {\n+ vids <- colSums(Z)>0\n+ Z <- Z[,vids]\n+ K <- K[vids,vids]\n+ }\n+ eig <- eigen(K%*%crossprod(Z,Z),symmetric=FALSE,EISPACK=TRUE)\n+ return(list(values=eig$values,vectors=qr.Q(qr(Z%*%eig$vectors),complete=TRUE)))\n+}\n+\n+emma.eigen.R <- function(Z,K,X,complete=TRUE) {\n+ if ( ncol(X) == 0 ) {\n+ return(emma.eigen.L(Z,K))\n+ }\n+ else if ( is.null(Z) ) {\n+ return(emma.eigen.R.wo.Z(K,X))\n+ }\n+ else {\n+ return(emma.eigen.R.w.Z(Z,K,X,complete))\n+ }\n+}\n+\n+emma.eigen.R.wo.Z <- function(K, X) {\n+ n <- nrow(X)\n+ q <- ncol(X)\n+ S <- diag(n)-X%*%solve(crossprod(X,X))%*%t(X)\n+ eig <- eigen(S%*%(K+diag(1,n))%*%S,symmetric=TRUE)\n+ stopifnot(!is.complex(eig$values))\n+ return(list(values=eig$values[1:(n-q)]-1,vectors=eig$vectors[,1:(n-q)]))\n+}\n+\n+emma.eigen.R.w.Z <- function(Z, K, X, complete = TRUE) {\n+ if ( complete == FALSE ) {\n+ vids <- colSums(Z) > 0\n+ Z <- Z[,vids]\n+ K <- K[vids,vids]\n+ }\n+ n <- nrow(Z)\n+ t <- ncol(Z)\n+ q <- ncol(X)\n+ \n+ SZ <- Z - X%*%solve(crossprod(X,X))%*%crossprod(X,Z)\n+ eig <- eigen(K%*%crossprod(Z,SZ),symmetric=FALSE,EISPACK=TRUE)\n+ if ( is.complex(eig$values) ) {\n+ eig$values <- Re(eig$values)\n+ eig$vectors <- Re(eig$vectors) \n+ }\n+ qr.X <- qr.Q(qr(X))\n+ return(list(values=eig$values[1:(t-q)],\n+ vectors=qr.Q(qr(cbind(SZ%*%eig$vectors[,1:(t-q)],qr.X)),\n+ complete=TRUE)[,c(1:(t-q),(t+1):n)])) \n+}\n+\n+emma.delta.ML.LL.wo.Z <- function(logdelta, lambda, etas, xi) {\n+ n <- length(xi)\n+ delta <- exp(logdelta)\n+ return( 0.5*(n*(log(n/(2*pi))-1-log(sum((etas*etas)/(lambda+delta))))-sum(log(xi+delta))) ) \n+}\n+\n+emma.delta.ML.LL.w.Z <- function(logdelta, lambda, etas.1, xi.1, n, etas.2.sq ) {\n+ t <- length(xi.1)\n+ delta <- exp(logdelta)\n+# stopifnot(length(lambda) == length(etas.1))\n+ return( 0.5*(n*(log(n/(2*pi))-1-log(sum(etas.1*etas.1/(lambda+delta))+etas.2.sq/delta))-(sum(log(xi.1+delta))+(n-t)*logdelta)) )\n+}\n+\n+emma.delta.ML.dLL.wo.Z <- functio'..b',drop=FALSE]))\n+ if ( nv == t ) {\n+ eig.R1 = emma.eigen.R.w.Z(Z,K,X)\n+ } \n+ }\n+\n+ for(j in 1:g) {\n+ vrows <- !is.na(ys[j,])\n+ if ( nv == t ) {\n+ yv <- ys[j,vrows]\n+ nr <- sum(vrows)\n+ if ( is.null(Z) ) {\n+ if ( nr == n ) {\n+ REMLE <- emma.REMLE(yv,X,K,NULL,ngrids,llim,ulim,esp,eig.R1)\n+ U <- eig.L$vectors * matrix(sqrt(1/(eig.L$values+REMLE$delta)),n,n,byrow=TRUE) \n+ }\n+ else {\n+ eig.L0 <- emma.eigen.L.wo.Z(K[vrows,vrows,drop=FALSE])\n+ REMLE <- emma.REMLE(yv,X[vrows,,drop=FALSE],K[vrows,vrows,drop=FALSE],NULL,ngrids,llim,ulim,esp)\n+ U <- eig.L0$vectors * matrix(sqrt(1/(eig.L0$values+REMLE$delta)),nr,nr,byrow=TRUE)\n+ }\n+ dfs[i,j] <- nr-q1\n+ }\n+ else {\n+ if ( nr == n ) {\n+ REMLE <- emma.REMLE(yv,X,K,Z,ngrids,llim,ulim,esp,eig.R1)\n+ U <- eig.L$vectors * matrix(c(sqrt(1/(eig.L$values+REMLE$delta)),rep(sqrt(1/REMLE$delta),n-t)),n,n,byrow=TRUE) \n+ }\n+ else {\n+ vtids <- as.logical(colSums(Z[vrows,,drop=FALSE]))\n+ eig.L0 <- emma.eigen.L.w.Z(Z[vrows,vtids,drop=FALSE],K[vtids,vtids,drop=FALSE])\n+ REMLE <- emma.REMLE(yv,X[vrows,,drop=FALSE],K[vtids,vtids,drop=FALSE],Z[vrows,vtids,drop=FALSE],ngrids,llim,ulim,esp)\n+ U <- eig.L0$vectors * matrix(c(sqrt(1/(eig.L0$values+REMLE$delta)),rep(sqrt(1/REMLE$delta),nr-sum(vtids))),nr,nr,byrow=TRUE)\n+ }\n+ dfs[i,j] <- nr-q1\n+ }\n+\n+ yt <- crossprod(U,yv)\n+ Xt <- crossprod(U,X[vrows,,drop=FALSE])\n+ iXX <- solve(crossprod(Xt,Xt))\n+ beta <- iXX%*%crossprod(Xt,yt)\n+ if ( !ponly ) {\n+ vgs[i,j] <- REMLE$vg\n+ ves[i,j] <- REMLE$ve\n+ REMLs[i,j] <- REMLE$REML\n+ }\n+ stats[i,j] <- beta[q1]/sqrt(iXX[q1,q1]*REMLE$vg)\n+ }\n+ else {\n+ if ( is.null(Z) ) {\n+ vtids <- vrows & vids\n+ eig.L0 <- emma.eigen.L.wo.Z(K[vtids,vtids,drop=FALSE])\n+ yv <- ys[j,vtids]\n+ nr <- sum(vtids)\n+ REMLE <- emma.REMLE(yv,X[vtids,,drop=FALSE],K[vtids,vtids,drop=FALSE],NULL,ngrids,llim,ulim,esp)\n+ U <- eig.L0$vectors * matrix(sqrt(1/(eig.L0$values+REMLE$delta)),nr,nr,byrow=TRUE)\n+ Xt <- crossprod(U,X[vtids,,drop=FALSE])\n+ dfs[i,j] <- nr-q1\n+ }\n+ else {\n+ vtids <- as.logical(colSums(Z[vrows,,drop=FALSE])) & vids\n+ vtrows <- vrows & as.logical(rowSums(Z[,vids,drop=FALSE]))\n+ eig.L0 <- emma.eigen.L.w.Z(Z[vtrows,vtids,drop=FALSE],K[vtids,vtids,drop=FALSE])\n+ yv <- ys[j,vtrows]\n+ nr <- sum(vtrows)\n+ REMLE <- emma.REMLE(yv,X[vtrows,,drop=FALSE],K[vtids,vtids,drop=FALSE],Z[vtrows,vtids,drop=FALSE],ngrids,llim,ulim,esp)\n+ U <- eig.L0$vectors * matrix(c(sqrt(1/(eig.L0$values+REMLE$delta)),rep(sqrt(1/REMLE$delta),nr-sum(vtids))),nr,nr,byrow=TRUE)\n+ Xt <- crossprod(U,X[vtrows,,drop=FALSE])\n+ dfs[i,j] <- nr-q1\n+ }\n+ yt <- crossprod(U,yv)\n+ iXX <- solve(crossprod(Xt,Xt))\n+ beta <- iXX%*%crossprod(Xt,yt)\n+ if ( !ponly ) {\n+ vgs[i,j] <- REMLE$vg\n+ ves[i,j] <- REMLE$ve\n+ REMLs[i,j] <- REMLE$REML\n+ }\n+ stats[i,j] <- beta[q1]/sqrt(iXX[q1,q1]*REMLE$vg)\n+ \n+ }\n+ }\n+ ps[i,] <- 2*pt(abs(stats[i,]),dfs[i,],lower.tail=FALSE) \n+ }\n+ } \n+ }\n+ if ( ponly ) {\n+ return (ps)\n+ }\n+ else {\n+ return (list(ps=ps,REMLs=REMLs,stats=stats,dfs=dfs,vgs=vgs,ves=ves))\n+ }\n+}\n' |
b |
diff -r 000000000000 -r 6b7107812931 mlmm/source_library/mlmm.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mlmm/source_library/mlmm.r Thu Jul 02 05:42:38 2015 -0400 |
[ |
b"@@ -0,0 +1,477 @@\n+##############################################################################################################################################\r\n+###MLMM - Multi-Locus Mixed Model\r\n+###SET OF FUNCTIONS TO CARRY GWAS CORRECTING FOR POPULATION STRUCTURE WHILE INCLUDING COFACTORS THROUGH A STEPWISE-REGRESSION APPROACH\r\n+#######\r\n+#\r\n+##note: require EMMA\r\n+#library(emma)\r\n+#source('emma.r')\r\n+#\r\n+##REQUIRED DATA & FORMAT\r\n+#\r\n+#PHENOTYPE - Y: a vector of length m, with names(Y)=individual names\r\n+#GENOTYPE - X: a n by m matrix, where n=number of individuals, m=number of SNPs, with rownames(X)=individual names, and colnames(X)=SNP names\r\n+#KINSHIP - K: a n by n matrix, with rownames(K)=colnames(K)=individual names\r\n+#each of these data being sorted in the same way, according to the individual name\r\n+#\r\n+##FOR PLOTING THE GWAS RESULTS\r\n+#SNP INFORMATION - snp_info: a data frame having at least 3 columns:\r\n+# - 1 named 'SNP', with SNP names (same as colnames(X)),\r\n+# - 1 named 'Chr', with the chromosome number to which belong each SNP\r\n+# - 1 named 'Pos', with the position of the SNP onto the chromosome it belongs to.\r\n+#######\r\n+#\r\n+##FUNCTIONS USE\r\n+#save this file somewhere on your computer and source it!\r\n+#source('path/mlmm.r')\r\n+#\r\n+###FORWARD + BACKWARD ANALYSES\r\n+#mygwas<-mlmm(Y,X,K,nbchunks,maxsteps)\r\n+#X,Y,K as described above\r\n+#nbchunks: an integer defining the number of chunks of X to run the analysis, allows to decrease the memory usage ==> minimum=2, increase it if you do not have enough memory\r\n+#maxsteps: maximum number of steps desired in the forward approach. The forward approach breaks automatically once the pseudo-heritability is close to 0,\r\n+#\t\t\thowever to avoid doing too many steps in case the pseudo-heritability does not reach a value close to 0, this parameter is also used.\r\n+#\t\t\tIt's value must be specified as an integer >= 3\r\n+#\r\n+###RESULTS\r\n+#\r\n+##STEPWISE TABLE\r\n+#mygwas$step_table\r\n+#\r\n+##PLOTS\r\n+#\r\n+##PLOTS FORM THE FORWARD TABLE\r\n+#plot_step_table(mygwas,type=c('h2','maxpval','BIC','extBIC'))\r\n+#\r\n+##RSS PLOT\r\n+#plot_step_RSS(mygwas)\r\n+#\r\n+##GWAS MANHATTAN PLOTS\r\n+#\r\n+#FORWARD STEPS\r\n+#plot_fwd_GWAS(mygwas,step,snp_info,pval_filt)\r\n+#step=the step to be plotted in the forward approach, where 1 is the EMMAX scan (no cofactor)\r\n+#snp_info as described above\r\n+#pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot\r\n+#\r\n+#OPTIMAL MODELS\r\n+#Automatic identification of the optimal models within the forwrad-backward models according to the extendedBIC or multiple-bonferonni criteria\r\n+#\r\n+#plot_opt_GWAS(mygwas,opt=c('extBIC','mbonf'),snp_info,pval_filt)\r\n+#snp_info as described above\r\n+#pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot\r\n+#\r\n+##GWAS MANHATTAN PLOT ZOOMED IN A REGION OF INTEREST\r\n+#plot_fwd_region(mygwas,step,snp_info,pval_filt,chrom,pos1,pos2)\r\n+#step=the step to be plotted in the forward approach, where 1 is the EMMAX scan (no cofactor)\r\n+#snp_info as described above\r\n+#pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot\r\n+#chrom is an integer specifying the chromosome on which the region of interest is\r\n+#pos1, pos2 are integers delimiting the region of interest in the same unit as Pos in snp_info\r\n+#\r\n+#plot_opt_region(mygwas,opt=c('extBIC','mbonf'),snp_info,pval_filt,chrom,pos1,pos2)\r\n+#snp_info as described above\r\n+#pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot\r\n+#chrom is an integer specifying the chromosome on which the region of interest is\r\n+#pos1, pos2 are integers delimiting the region of interest in the same unit as Pos in snp_info\r\n+#\r\n+##QQPLOTS of pvalues\r\n+#qqplot_fwd_GWAS(mygwas,nsteps)\r\n+#nsteps=maximum number of forward steps to be displayed\r\n+#\r\n+#qqplot_opt_GWAS(mygwas,opt=c('extBIC"..b'ok$Chr),min)$x+aggregate(output_ok$xpos,list(output_ok$Chr),max)$x)/2\r\n+\r\n+\tplot(output_ok$xpos,-log10(output_ok$pval),col=output_ok$col,pch=20,ylab=\'-log10(pval)\',xaxt=\'n\',xlab=\'chromosome\')\r\n+\taxis(1,tick=FALSE,at=d,labels=c(1:max(unique(output_ok$Chr))))\r\n+\tabline(h=x$bonf_thresh,lty=3,col=\'black\')}\r\n+\r\n+plot_region<-function(x,chrom,pos1,pos2){\r\n+\tregion<-subset(x$out,Chr==chrom & Pos>=pos1 & Pos <=pos2)\r\n+\tregion$col<- if (chrom %% 2 == 0) {\'gray60\'} else {\'gray10\'}\r\n+\tregion$col[which(region$SNP %in% x$cof)]<-\'red\'\r\n+\tplot(region$Pos,-log10(region$pval),type=\'p\',pch=20,main=paste(\'chromosome\',chrom,sep=\'\'),xlab=\'position (bp)\',ylab=\'-log10(pval)\',col=region$col,xlim=c(pos1,pos2))\r\n+\tabline(h=x$bonf_thresh,lty=3,col=\'black\')}\r\n+\r\n+\r\n+plot_fwd_GWAS<-function(x,step,snp_info,pval_filt) {\r\n+\tstopifnot(step<=length(x$pval_step))\r\n+\toutput<-list(out=subset(merge(snp_info,x$pval_step[[step]]$out,by=\'SNP\'),pval<=pval_filt),cof=x$pval_step[[step]]$cof,bonf_thresh=x$bonf_thresh)\r\n+\tplot_GWAS(output)}\r\n+\r\n+plot_fwd_region<-function(x,step,snp_info,pval_filt,chrom,pos1,pos2) {\r\n+\tstopifnot(step<=length(x$pval_step))\r\n+\toutput<-list(out=subset(merge(snp_info,x$pval_step[[step]]$out,by=\'SNP\'),pval<=pval_filt),cof=x$pval_step[[step]]$cof,bonf_thresh=x$bonf_thresh)\r\n+\tplot_region(output,chrom,pos1,pos2)}\r\n+\r\n+\r\n+plot_opt_GWAS<-function(x,opt,snp_info,pval_filt) {\r\n+\tif (opt==\'extBIC\') {output<-list(out=subset(merge(snp_info,x$opt_extBIC$out,by=\'SNP\'),pval<=pval_filt),cof=x$opt_extBIC$cof,bonf_thresh=x$bonf_thresh)\r\n+\t\tplot_GWAS(output)}\r\n+\telse if (opt==\'mbonf\') {output<-list(out=subset(merge(snp_info,x$opt_mbonf$out,by=\'SNP\'),pval<=pval_filt),cof=x$opt_mbonf$cof,bonf_thresh=x$bonf_thresh)\r\n+\t\tplot_GWAS(output)}\r\n+\telse {cat(\'error! \\n opt must be extBIC or mbonf\')}}\r\n+\r\n+plot_opt_region<-function(x,opt,snp_info,pval_filt,chrom,pos1,pos2) {\r\n+\tif (opt==\'extBIC\') {output<-list(out=subset(merge(snp_info,x$opt_extBIC$out,by=\'SNP\'),pval<=pval_filt),cof=x$opt_extBIC$cof,bonf_thresh=x$bonf_thresh)\r\n+\t\tplot_region(output,chrom,pos1,pos2)}\r\n+\telse if (opt==\'mbonf\') {output<-list(out=subset(merge(snp_info,x$opt_mbonf$out,by=\'SNP\'),pval<=pval_filt),cof=x$opt_mbonf$cof,bonf_thresh=x$bonf_thresh)\r\n+\t\tplot_region(output,chrom,pos1,pos2)}\r\n+\telse {cat(\'error! \\n opt must be extBIC or mbonf\')}}\r\n+\r\n+\r\n+qqplot_fwd_GWAS<-function(x,nsteps){\r\n+\tstopifnot(nsteps<=length(x$pval_step))\r\n+\te<--log10(ppoints(nrow(x$pval_step[[1]]$out)))\r\n+\tostep<-list()\r\n+\tostep[[1]]<--log10(sort(x$pval_step[[1]]$out$pval))\r\n+\tfor (i in 2:nsteps) {ostep[[i]]<--log10(sort(x$pval_step[[i]]$out$pval))}\r\n+\r\n+\tmaxp<-ceiling(max(unlist(ostep)))\r\n+\r\n+\tplot(e,ostep[[1]],type=\'b\',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp))\r\n+\tabline(0,1,col="dark grey")\r\n+\r\n+\tfor (i in 2:nsteps) {\r\n+\tpar(new=T)\r\n+\tplot(e,ostep[[i]],type=\'b\',pch=20,cex=0.8,col=i,axes=\'F\',xlab=\'\',ylab=\'\',xlim=c(0,max(e)+1),ylim=c(0,maxp))}\r\n+\tlegend(0,maxp,lty=1,pch=20,col=c(1:length(ostep)),paste(c(0:(length(ostep)-1)),\'cof\',sep=\' \'))\r\n+}\r\n+\r\n+qqplot_opt_GWAS<-function(x,opt){\r\n+\tif (opt==\'extBIC\') {\r\n+\t\te<--log10(ppoints(nrow(x$opt_extBIC$out)))\r\n+\t\to<--log10(sort(x$opt_extBIC$out$pval))\r\n+\t\tmaxp<-ceiling(max(o))\r\n+\t\tplot(e,o,type=\'b\',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp),main=paste(\'optimal model according to extBIC\'))\r\n+\t\tabline(0,1,col="dark grey")}\r\n+\telse if (opt==\'mbonf\') {\r\n+\t\te<--log10(ppoints(nrow(x$opt_mbonf$out)))\r\n+\t\to<--log10(sort(x$opt_mbonf$out$pval))\r\n+\t\tmaxp<-ceiling(max(o))\r\n+\t\tplot(e,o,type=\'b\',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp),main=paste(\'optimal model according to mbonf\'))\r\n+\t\tabline(0,1,col="dark grey")}\r\n+\telse {cat(\'error! \\n opt must be extBIC or mbonf\')}}\r\n+\r\n+\r\n' |
b |
diff -r 000000000000 -r 6b7107812931 mlmm/source_library/mlmm1.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mlmm/source_library/mlmm1.r Thu Jul 02 05:42:38 2015 -0400 |
[ |
b"@@ -0,0 +1,486 @@\n+##############################################################################################################################################\r\n+###MLMM - Multi-Locus Mixed Model\r\n+###SET OF FUNCTIONS TO CARRY GWAS CORRECTING FOR POPULATION STRUCTURE WHILE INCLUDING COFACTORS THROUGH A STEPWISE-REGRESSION APPROACH\r\n+#######\r\n+#\r\n+##note: require EMMA\r\n+#library(emma)\r\n+#source('emma.r')\r\n+#\r\n+##REQUIRED DATA & FORMAT\r\n+#\r\n+#PHENOTYPE - Y: a vector of length m, with names(Y)=individual names\r\n+#GENOTYPE - X: a n by m matrix, where n=number of individuals, m=n umber of SNPs, with rownames(X)=individual names, and colnames(X)=SNP names\r\n+#KINSHIP - K: a n by n matrix, with rownames(K)=colnames(K)=individual names\r\n+#each of these data being sorted in the same way, according to the individual name\r\n+#\r\n+##FOR PLOTING THE GWAS RESULTS\r\n+#SNP INFORMATION - snp_info: a data frame having at least 3 columns:\r\n+# - 1 named 'SNP', with SNP names (same as colnames(X)),\r\n+# - 1 named 'Chr', with the chromosome number to which belong each SNP\r\n+# - 1 named 'Pos', with the position of the SNP onto the chromosome it belongs to.\r\n+#######\r\n+#\r\n+##FUNCTIONS USE\r\n+#save this file somewhere on your computer and source it!\r\n+#source('path/mlmm.r')\r\n+#\r\n+###FORWARD + BACKWARD ANALYSES\r\n+#mygwas<-mlmm(Y,X,K,nbchunks,maxsteps)\r\n+#X,Y,K as described above\r\n+#nbchunks: an integer defining the number of chunks of to run the analysis, allows to decrease the memory usage ==> minimum=2, increase it if you do not have enough memory\r\n+#maxsteps: maximum number of steps desired in the forward approach. The forward approach breaks automatically once the pseudo-heritability is close to 0,\r\n+#\t\t\thowever to avoid doing too many steps in case the pseudo-heritability does not reach a value close to 0, this parameter is also used.\r\n+#\t\t\tIt's value must be specified as an integer >= 3\r\n+#\r\n+###RESULTS\r\n+#\r\n+##STEPWISE TABLE\r\n+#mygwas$step_table\r\n+#\r\n+##PLOTS\r\n+#\r\n+##PLOTS FORM THE FORWARD TABLE\r\n+#plot_step_table(mygwas,type=c('h2','maxpval','BIC','extBIC'))\r\n+#\r\n+##RSS PLOT\r\n+#plot_step_RSS(mygwas)\r\n+#\r\n+##GWAS MANHATTAN PLOTS\r\n+#\r\n+#FORWARD STEPS\r\n+#plot_fwd_GWAS(mygwas,step,snp_info,pval_filt)\r\n+#step=the step to be plotted in the forward approach, where 1 is the EMMAX scan (no cofactor)\r\n+#snp_info as described above\r\n+#pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot\r\n+#\r\n+#OPTIMAL MODELS\r\n+#Automatic identification of the optimal models within the forwrad-backward models according to the extendedBIC or multiple-bonferonni criteria\r\n+#\r\n+#plot_opt_GWAS(mygwas,opt=c('extBIC','mbonf'),snp_info,pval_filt)\r\n+#snp_info as described above\r\n+#pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot\r\n+#\r\n+##GWAS MANHATTAN PLOT ZOOMED IN A REGION OF INTEREST\r\n+#plot_fwd_region(mygwas,step,snp_info,pval_filt,chrom,pos1,pos2)\r\n+#step=the step to be plotted in the forward approach, where 1 is the EMMAX scan (no cofactor)\r\n+#snp_info as described above\r\n+#pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot\r\n+#chrom is an integer specifying the chromosome on which the region of interest is\r\n+#pos1, pos2 are integers delimiting the region of interest in the same unit as Pos in snp_info\r\n+#\r\n+#plot_opt_region(mygwas,opt=c('extBIC','mbonf'),snp_info,pval_filt,chrom,pos1,pos2)\r\n+#snp_info as described above\r\n+#pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot\r\n+#chrom is an integer specifying the chromosome on which the region of interest is\r\n+#pos1, pos2 are integers delimiting the region of interest in the same unit as Pos in snp_info\r\n+#\r\n+##QQPLOTS of pvalues\r\n+#qqplot_fwd_GWAS(mygwas,nsteps)\r\n+#nsteps=maximum number of forward steps to be displayed\r\n+#\r\n+#qqplot_opt_GWAS(mygwas,opt=c('extBIC'"..b'output_ok$pval) > x$bonf_thresh]), output_ok$SNP[-log10(output_ok$pval) > x$bonf_thresh], pos=3, cex=0.7) \r\n+ legend("topright", lty=3, paste("bonf thresh :", x$bonf_thresh ,sep=" "))\r\n+ } else {\r\n+ legend("topright", lty=3, paste("bonf thresh :", x$bonf_thresh ,sep=" "))\r\n+ }\r\n+}\r\n+ \r\n+plot_region<-function(x,chrom,pos1,pos2){\r\n+\tregion<-subset(x$out,Chr==chrom & Pos>=pos1 & Pos <=pos2)\r\n+\tregion$col<- if (chrom %% 2 == 0) {\'gray60\'} else {\'gray10\'}\r\n+\tregion$col[which(region$SNP %in% x$cof)]<-\'red\'\r\n+\tplot(region$Pos,-log10(region$pval),type=\'p\',pch=20,main=paste(\'chromosome\',chrom,sep=\'\'),xlab=\'position (bp)\',ylab=\'-log10(pval)\',col=region$col,xlim=c(pos1,pos2))\r\n+\tabline(h=x$bonf_thresh,lty=3,col=\'black\')}\r\n+\r\n+\r\n+plot_fwd_GWAS<-function(x,step,snp_info,pval_filt) {\r\n+\tstopifnot(step<=length(x$pval_step))\r\n+\toutput<-list(out=subset(merge(snp_info,x$pval_step[[step]]$out,by=\'SNP\'),pval<=pval_filt),cof=x$pval_step[[step]]$cof,bonf_thresh=x$bonf_thresh)\r\n+\tplot_GWAS(output)}\r\n+\r\n+plot_fwd_region<-function(x,step,snp_info,pval_filt,chrom,pos1,pos2) {\r\n+\tstopifnot(step<=length(x$pval_step))\r\n+\toutput<-list(out=subset(merge(snp_info,x$pval_step[[step]]$out,by=\'SNP\'),pval<=pval_filt),cof=x$pval_step[[step]]$cof,bonf_thresh=x$bonf_thresh)\r\n+\tplot_region(output,chrom,pos1,pos2)}\r\n+\r\n+\r\n+plot_opt_GWAS<-function(x,opt,snp_info,pval_filt) {\r\n+\tif (opt==\'extBIC\') {output<-list(out=subset(merge(snp_info,x$opt_extBIC$out,by=\'SNP\'),pval<=pval_filt),cof=x$opt_extBIC$cof,bonf_thresh=x$bonf_thresh)\r\n+\t\tplot_GWAS(output)}\r\n+\telse if (opt==\'mbonf\') {output<-list(out=subset(merge(snp_info,x$opt_mbonf$out,by=\'SNP\'),pval<=pval_filt),cof=x$opt_mbonf$cof,bonf_thresh=x$bonf_thresh)\r\n+\t\tplot_GWAS(output)}\r\n+\telse {cat(\'error! \\n opt must be extBIC or mbonf\')}}\r\n+\r\n+plot_opt_region<-function(x,opt,snp_info,pval_filt,chrom,pos1,pos2) {\r\n+\tif (opt==\'extBIC\') {output<-list(out=subset(merge(snp_info,x$opt_extBIC$out,by=\'SNP\'),pval<=pval_filt),cof=x$opt_extBIC$cof,bonf_thresh=x$bonf_thresh)\r\n+\t\tplot_region(output,chrom,pos1,pos2)}\r\n+\telse if (opt==\'mbonf\') {output<-list(out=subset(merge(snp_info,x$opt_mbonf$out,by=\'SNP\'),pval<=pval_filt),cof=x$opt_mbonf$cof,bonf_thresh=x$bonf_thresh)\r\n+\t\tplot_region(output,chrom,pos1,pos2)}\r\n+\telse {cat(\'error! \\n opt must be extBIC or mbonf\')}}\r\n+\r\n+\r\n+qqplot_fwd_GWAS<-function(x,nsteps){\r\n+\tstopifnot(nsteps<=length(x$pval_step))\r\n+\te<--log10(ppoints(nrow(x$pval_step[[1]]$out)))\r\n+\tostep<-list()\r\n+\tostep[[1]]<--log10(sort(x$pval_step[[1]]$out$pval))\r\n+\tfor (i in 2:nsteps) {ostep[[i]]<--log10(sort(x$pval_step[[i]]$out$pval))}\r\n+\r\n+\tmaxp<-ceiling(max(unlist(ostep)))\r\n+\r\n+\tplot(e,ostep[[1]],type=\'b\',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp))\r\n+\tabline(0,1,col="dark grey")\r\n+\r\n+\tfor (i in 2:nsteps) {\r\n+\tpar(new=T)\r\n+\tplot(e,ostep[[i]],type=\'b\',pch=20,cex=0.8,col=i,axes=\'F\',xlab=\'\',ylab=\'\',xlim=c(0,max(e)+1),ylim=c(0,maxp))}\r\n+\tlegend(0,maxp,lty=1,pch=20,col=c(1:length(ostep)),paste(c(0:(length(ostep)-1)),\'cof\',sep=\' \'))\r\n+}\r\n+\r\n+qqplot_opt_GWAS<-function(x,opt){\r\n+\tif (opt==\'extBIC\') {\r\n+\t\te<--log10(ppoints(nrow(x$opt_extBIC$out)))\r\n+\t\to<--log10(sort(x$opt_extBIC$out$pval))\r\n+\t\tmaxp<-ceiling(max(o))\r\n+\t\tplot(e,o,type=\'b\',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp),main=paste(\'optimal model according to extBIC\'))\r\n+\t\tabline(0,1,col="dark grey")}\r\n+\telse if (opt==\'mbonf\') {\r\n+\t\te<--log10(ppoints(nrow(x$opt_mbonf$out)))\r\n+\t\to<--log10(sort(x$opt_mbonf$out$pval))\r\n+\t\tmaxp<-ceiling(max(o))\r\n+\t\tplot(e,o,type=\'b\',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp),main=paste(\'optimal model according to mbonf\'))\r\n+\t\tabline(0,1,col="dark grey")}\r\n+\telse {cat(\'error! \\n opt must be extBIC or mbonf\')}}\r\n+\r\n+\r\n' |
b |
diff -r 000000000000 -r 6b7107812931 mlmm/source_library/plot_MLMM.Ago.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mlmm/source_library/plot_MLMM.Ago.r Thu Jul 02 05:42:38 2015 -0400 |
[ |
@@ -0,0 +1,36 @@ +## fonction ########################################################################### +plot_MLMM.Ago<-function(x) { + output1 <-x$out[order(x$out$Pos),] + output_ok<-output1[order(output1$Chr),] + + maxpos<-c(0,cumsum(as.numeric(aggregate(output_ok$Pos,list(output_ok$Chr),max)$x+max(cumsum(as.numeric(aggregate(output_ok$Pos,list(output_ok$Chr),max)$x)))/200))) + plot_col<-rep(c('gray10','gray60'),ceiling(max(unique(output_ok$Chr))/2)) +# plot_col<-c('blue','darkgreen','red','cyan','purple') + size<-aggregate(output_ok$Pos,list(output_ok$Chr),length)$x + a<-rep(maxpos[1],size[1]) + b<-rep(plot_col[1],size[1]) + for (i in 2:max(unique(output_ok$Chr))){ + a<-c(a,rep(maxpos[i],size[i])) + b<-c(b,rep(plot_col[i],size[i]))} + + output_ok$xpos<-output_ok$Pos+a + output_ok$col<-b + output_ok$col[output_ok$mk=='qtl']<-'cyan' + output_ok$col[output_ok$SNP %in% x$cof]<-'red' + + d<-(aggregate(output_ok$xpos,list(output_ok$Chr),min)$x+aggregate(output_ok$xpos,list(output_ok$Chr),max)$x)/2 + + plot(output_ok$xpos,-log10(output_ok$pval),col=output_ok$col,pch=20,ylab='-log10(pval)',xaxt='n',xlab='chromosome') + axis(1,tick=FALSE,at=d,labels=c(1:max(unique(output_ok$Chr)))) + xline(output_ok$xpos[output_ok$mk=='qtl'], col='cyan', lwd=0.1) + abline(h=x$bonf_thresh,lty=3,col='black') + + + if (length(output_ok$pval[-log10(output_ok$pval) > x$bonf_thresh]) > 0) { + text(output_ok$xpos[-log10(output_ok$pval) > x$bonf_thresh], -log10(output_ok$pval[-log10(output_ok$pval) > x$bonf_thresh]), output_ok$SNP[-log10(output_ok$pval) > x$bonf_thresh], pos=3, cex=0.7) + legend("topright", lty=3, paste("bonf thresh :", x$bonf_thresh ,sep=" ")) + } else { + legend("topright", lty=3, paste("bonf thresh :", x$bonf_thresh ,sep=" ")) + } + +} |