Repository 'mlmm'
hg clone https://toolshed.g2.bx.psu.edu/repos/dereeper/mlmm

Changeset 0:6b7107812931 (2015-07-02)
Next changeset 1:380b364980f9 (2018-04-16)
Commit message:
Uploaded
added:
mlmm/MLMM.pl
mlmm/MLMM.sh
mlmm/MLMM.xml
mlmm/source_library/emma.mlmm.r
mlmm/source_library/emma.r
mlmm/source_library/mlmm.r
mlmm/source_library/mlmm1.r
mlmm/source_library/plot_MLMM.Ago.r
b
diff -r 000000000000 -r 6b7107812931 mlmm/MLMM.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mlmm/MLMM.pl Thu Jul 02 05:42:38 2015 -0400
[
@@ -0,0 +1,155 @@
+#!/usr/bin/perl
+
+use strict;
+use Switch;
+use Getopt::Long;
+use Bio::SeqIO;
+
+
+my $usage = qq~Usage:$0 <args> [<opts>]
+where <args> are:
+    -g, --geno         <Genotype input>
+    -i, --info         <SNP information. Genome position.>
+    -p, --pheno        <Phenotype input>
+    -o, --out          <output name>
+    -d, --directory    <directory for MLMM R libraries>
+    -s, --step_number  <number of steps. Maximum: 20. Default: 10>
+    -m, --method       <Method: mbonf or extBIC. Default: mbonf>
+~;
+$usage .= "\n";
+
+my ($geno,$map,$pheno,$out,$dir,$steps,$method);
+
+
+GetOptions(
+ "geno=s"      => \$geno,
+ "info=s"      => \$map,
+ "pheno=s"     => \$pheno,
+ "out=s"       => \$out,
+ "dir=s"       => \$dir,
+ "steps=s"     => \$steps,
+ "method=s"    => \$method
+);
+
+
+die $usage
+  if ( !$geno || !$map || !$pheno || !$out || !$dir || !$steps || !$method);
+
+my $max_steps = 10;
+my $plot_opt = "mbonf";
+if ($method && $method ne 'mbonf' && $method ne 'extBIC')
+{
+ print "Aborted: Method must be mbonf or extBIC.\n";
+ exit;
+}
+else
+{
+ $plot_opt = $method;
+}
+if ($steps && $steps !~/\d+/ && $steps > 20 && $steps < 2)
+{
+ print "Aborted: Number of steps must be greater than 2 and lower than 20.\n";
+ exit;
+}
+else
+{
+ $max_steps = $steps;
+}
+
+
+my $chunk = 2;
+
+my $RSCRIPT_EXE = "Rscript";
+my $R_DIR = $dir;
+
+
+my $head_trait = `head -1 $pheno`;
+my @headers_traits = split(/\t/,$head_trait);
+my $trait_name = $headers_traits[1];
+
+
+open( my $RCMD, ">rscript" ) or throw Error::Simple($!);
+
+
+
+
+print $RCMD "Y_file <- \"" . $pheno . "\"\n";
+print $RCMD "X_file <- \"" . $geno . "\"\n";
+if($map)
+{
+ print $RCMD "map_file <- \"$map\"\n";
+ print $RCMD "map <- read.table(map_file, sep = \"\\t\", header = T)\n";
+}
+print $RCMD "mlmm_data = list()\n";
+print $RCMD "mlmm_data\$chunk <- $chunk\n";
+print $RCMD "mlmm_data\$maxsteps <- $max_steps\n";
+print $RCMD "genot <- read.table(X_file, sep = \"\\t\", header = T)\n";
+print $RCMD "genot_mat <- as.matrix(genot[, 2:ncol(genot)])\n";
+print $RCMD "rownames(genot_mat) <- genot\$Ind_id\n";
+
+print $RCMD "phenot <- read.table(Y_file, sep = \"\\t\", header = T)\n";
+
+
+
+# missing data imputation
+print $RCMD "genot_imp <- genot_mat\n";
+print $RCMD "average <- colMeans(genot_imp, na.rm = T)\n";
+print $RCMD "for (i in 1:ncol(genot_imp)){genot_imp[is.na(genot_imp[,i]), i] <- average[i]}\n";
+
+# kinship matrix computation
+print $RCMD "average <- colMeans(genot_imp, na.rm = T)\n";
+print $RCMD "stdev <- apply(genot_imp, 2, sd)\n";
+print $RCMD "genot_stand <- sweep(sweep(genot_imp, 2, average, \"-\"), 2, stdev, \"/\")\n";
+print $RCMD "K_mat <- (genot_stand %*% t(genot_stand)) / ncol(genot_stand)\n";
+print $RCMD "write.table(K_mat, '$out.kinship', sep='\\t', dec='.', quote=F, col.names=T, row.names=T)\n";
+
+print $RCMD "source(\"" . $R_DIR. "/mlmm.r\")\n";
+print $RCMD "source(\"" . $R_DIR. "/emma.r\")\n";
+
+# mlmm
+print $RCMD "mygwas <- mlmm(Y = phenot\$$trait_name, X = genot_imp, K = K_mat, nbchunks=mlmm_data\$chunk, maxsteps=mlmm_data\$maxsteps)\n";
+
+# plots
+print $RCMD "pdf('$out.pdf')\n";
+print $RCMD "plot_step_table(mygwas, \"h2\")\n";
+print $RCMD "plot_step_table(mygwas, \"extBIC\")\n";
+print $RCMD "plot_step_table(mygwas, \"maxpval\")\n";
+print $RCMD "plot_step_RSS(mygwas)\n";
+# for (my $j = 1; $j <= ($max_steps - 1); $j++)
+# {
+ # print $RCMD "plot_fwd_GWAS(mygwas, step = $j, snp_info = map, pval_filt = 0.1)\n";
+# }
+print $RCMD "plot_opt_GWAS(mygwas, opt = \"extBIC\", snp_info = map, pval_filt = 0.1)\n";
+print $RCMD "plot_opt_GWAS(mygwas, opt = \"mbonf\", snp_info = map, pval_filt = 0.1)\n";
+#print $RCMD "qqplot_fwd_GWAS(mygwas, nsteps = mlmm_data\$maxsteps)\n";
+print $RCMD "qqplot_opt_GWAS(mygwas, opt = \"extBIC\")\n";
+print $RCMD "qqplot_opt_GWAS(mygwas, opt = \"mbonf\")\n";
+
+# outputs
+print $RCMD "write.table(mygwas\$RSSout, '$out.rss', sep='\\t', dec='.', quote=F, col.names=T, row.names=F)\n";
+print $RCMD "write.table(mygwas\$step_table, '$out.steptable', sep='\\t', dec='.', quote=F, col.names=T, row.names=F)\n";
+
+$plot_opt = "\$opt_" . $plot_opt;
+print $RCMD "pval = mygwas" . $plot_opt . "\$out\n";
+print $RCMD "colnames(pval) = c(\"Marker_name\", \"Pvalue\")\n";
+print $RCMD "info_tmp = map\n";
+print $RCMD "colnames(info_tmp) = c(\"Marker_name\", \"Chr\", \"Pos\")\n";
+print $RCMD "res_asso = pval\n";
+print $RCMD qq~
+if(exists("info_tmp")){
+ res_asso = merge(info_tmp, res_asso, by="Marker_name")
+ if( !is.element("Trait", colnames(info_tmp)) ){
+ m = matrix(data="traitname", ncol=1, nrow=nrow(res_asso), dimnames=list(c(), c("Trait")))
+ res_asso = cbind(m, res_asso)
+ }
+}
+~;
+print $RCMD "res_asso = res_asso[order(res_asso[, \"Trait\"], res_asso[, \"Chr\"], res_asso[, \"Pos\"]), ]\n";
+print $RCMD "write.table(res_asso, '$out.res_asso', sep='\t', dec='.', quote=F, col.names=T, row.names=F)\n";
+close($RCMD);
+
+system("$RSCRIPT_EXE --vanilla rscript");
+
+
+
+
b
diff -r 000000000000 -r 6b7107812931 mlmm/MLMM.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mlmm/MLMM.sh Thu Jul 02 05:42:38 2015 -0400
b
@@ -0,0 +1,28 @@
+#!/bin/bash
+geno=$1
+map=$2
+pheno=$3
+steps=$4
+method=$5
+output=$6
+pdf=$7
+kinship=$8
+rss=$9
+step_table=${10}
+log=${11}
+
+directory=`dirname $0`
+mkdir tmpdir$$
+cp -rf $geno tmpdir$$/geno
+cp -rf $map tmpdir$$/map
+cp -rf $pheno tmpdir$$/pheno

+perl $directory/MLMM.pl -g tmpdir$$/geno -i tmpdir$$/map -p tmpdir$$/pheno -s $steps -m $method -o tmpdir$$/output -d $directory/source_library >>$log 2>&1
+
+mv tmpdir$$/output.pdf $pdf
+mv tmpdir$$/output.kinship $kinship
+mv tmpdir$$/output.res_asso $output
+mv tmpdir$$/output.rss $rss
+mv tmpdir$$/output.steptable $step_table
+
+
b
diff -r 000000000000 -r 6b7107812931 mlmm/MLMM.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mlmm/MLMM.xml Thu Jul 02 05:42:38 2015 -0400
b
@@ -0,0 +1,61 @@
+<tool id="mlmm" name="MLMM" version="1.0">
+ <description>GWAS using Multi-Locus Mixed-Model (MLMM)</description>
+ <requirements>
+ <requirement type="binary">Rscript</requirement>
+ </requirements>
+ <command interpreter="bash">./MLMM.sh $geno $map $pheno $steps $method $output $pdf $kinship $rss $step_table $log
+    </command>
+ <inputs>
+ <param format="txt" name="geno" type="data" label="Genotype matrix" help="NxM, N = individuals in line, M = Markers in columns, Genotype coded in 0,1,2"/>
+ <param type="data" format="txt" name="map" label="SNP Information file" help="3 columns: SNP, Chrom, Pos"/>
+ <param format="txt" name="pheno" type="data" label="Phenotype matrix" help="NxT, N = individuals in line, T = Trait in columns (Phenot1, Phenot2...)"/>
+ <param type="text" name="steps" label="Maximum number of steps for the forward approach" value="10"/>
+ <param name="method" type="select">
+                        <option value="extBIC">EBIC</option>
+                        <option value="mbonf" selected="True">MBonf</option>
+                </param>
+ </inputs>
+ <outputs>
+ <data format="txt" name="output" label="Association results"/>
+ <data format="txt" name="kinship" label="Kinship matrix"/>
+ <data format="pdf" name="pdf" label="PDF Graphical outputs"/>
+ <data format="txt" name="rss" label="RSS"/>
+ <data format="txt" name="step_table" label="Step Table"/>
+ <data format="txt" name="log" label="Log file"/>
+ </outputs>
+ <help>
+
+
+.. class:: infomark
+
+**Program encapsulated in Galaxy by Southgreen**
+
+.. class:: infomark
+
+**MLMM version 1.0**
+
+-----
+
+==============
+ Please cite: 
+==============
+
+"An efficient multi-locus mixed-model approach for genome-wide association studies in structured populations.", **Segura V, Vilhjalmsson BJ, Platt A, Korte A, Seren U, Long Q, Nordborg M.**, Nature Genetics, 44: 825-830, 2012.
+
+-----
+
+===========
+ Overview:
+===========
+
+MLMM is an efficient multi-locus mixed-model approach for genome-wide association studies in structured populations.
+
+-----
+
+For further informations, please visite the MLMM_ website.
+
+
+.. _MLMM: https://sites.google.com/site/vincentosegura/mlmm
+ </help>
+
+</tool>
b
diff -r 000000000000 -r 6b7107812931 mlmm/source_library/emma.mlmm.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mlmm/source_library/emma.mlmm.r Thu Jul 02 05:42:38 2015 -0400
[
b'@@ -0,0 +1,1274 @@\n+emma.kinship <- function(snps, method="additive", use="all") {\n+  n0 <- sum(snps==0,na.rm=TRUE)\n+  nh <- sum(snps==0.5,na.rm=TRUE)\n+  n1 <- sum(snps==1,na.rm=TRUE)\n+  nNA <- sum(is.na(snps))\n+\n+  stopifnot(n0+nh+n1+nNA == length(snps))\n+\n+  if ( method == "dominant" ) {\n+    flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) > 0.5),nrow(snps),ncol(snps))\n+    snps[!is.na(snps) & (snps == 0.5)] <- flags[!is.na(snps) & (snps == 0.5)]\n+  }\n+  else if ( method == "recessive" ) {\n+    flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) < 0.5),nrow(snps),ncol(snps))\n+    snps[!is.na(snps) & (snps == 0.5)] <- flags[!is.na(snps) & (snps == 0.5)]\n+  }\n+  else if ( ( method == "additive" ) && ( nh > 0 ) ) {\n+    dsnps <- snps\n+    rsnps <- snps\n+    flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) > 0.5),nrow(snps),ncol(snps))\n+    dsnps[!is.na(snps) & (snps==0.5)] <- flags[!is.na(snps) & (snps==0.5)]\n+    flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) < 0.5),nrow(snps),ncol(snps))\n+    rsnps[!is.na(snps) & (snps==0.5)] <- flags[!is.na(snps) & (snps==0.5)]\n+    snps <- rbind(dsnps,rsnps)\n+  }\n+\n+  if ( use == "all" ) {\n+    mafs <- matrix(rowMeans(snps,na.rm=TRUE),nrow(snps),ncol(snps))\n+    snps[is.na(snps)] <- mafs[is.na(snps)]\n+  }\n+  else if ( use == "complete.obs" ) {\n+    snps <- snps[rowSums(is.na(snps))==0,]\n+  }\n+\n+  n <- ncol(snps)\n+  K <- matrix(nrow=n,ncol=n)\n+  diag(K) <- 1\n+\n+  for(i in 2:n) {\n+    for(j in 1:(i-1)) {\n+      x <- snps[,i]*snps[,j] + (1-snps[,i])*(1-snps[,j])\n+      K[i,j] <- sum(x,na.rm=TRUE)/sum(!is.na(x))\n+      K[j,i] <- K[i,j]\n+    }\n+  }\n+  return(K)\n+}\n+\n+emma.eigen.L <- function(Z,K,complete=TRUE) {\n+  if ( is.null(Z) ) {\n+    return(emma.eigen.L.wo.Z(K))\n+  }\n+  else {\n+    return(emma.eigen.L.w.Z(Z,K,complete))\n+  }\n+}\n+\n+emma.eigen.L.wo.Z <- function(K) {\n+  eig <- eigen(K,symmetric=TRUE)\n+  return(list(values=eig$values,vectors=eig$vectors))\n+}\n+\n+emma.eigen.L.w.Z <- function(Z,K,complete=TRUE) {\n+  if ( complete == FALSE ) {\n+    vids <- colSums(Z)>0\n+    Z <- Z[,vids]\n+    K <- K[vids,vids]\n+  }\n+  eig <- eigen(K%*%crossprod(Z,Z),symmetric=FALSE,EISPACK=TRUE)\n+  return(list(values=eig$values,vectors=qr.Q(qr(Z%*%eig$vectors),complete=TRUE)))\n+}\n+\n+emma.eigen.R <- function(Z,K,X,complete=TRUE) {\n+  if ( ncol(X) == 0 ) {\n+    return(emma.eigen.L(Z,K))\n+  }\n+  else if ( is.null(Z) ) {\n+    return(emma.eigen.R.wo.Z(K,X))\n+  }\n+  else {\n+    return(emma.eigen.R.w.Z(Z,K,X,complete))\n+  }\n+}\n+\n+emma.eigen.R.wo.Z <- function(K, X) {\n+  n <- nrow(X)\n+  q <- ncol(X)\n+  S <- diag(n)-X%*%solve(crossprod(X,X))%*%t(X)\n+  eig <- eigen(S%*%(K+diag(1,n))%*%S,symmetric=TRUE)\n+  stopifnot(!is.complex(eig$values))\n+  return(list(values=eig$values[1:(n-q)]-1,vectors=eig$vectors[,1:(n-q)]))\n+}\n+\n+emma.eigen.R.w.Z <- function(Z, K, X, complete = TRUE) {\n+  if ( complete == FALSE ) {\n+    vids <-  colSums(Z) > 0\n+    Z <- Z[,vids]\n+    K <- K[vids,vids]\n+  }\n+  n <- nrow(Z)\n+  t <- ncol(Z)\n+  q <- ncol(X)\n+  \n+  SZ <- Z - X%*%solve(crossprod(X,X))%*%crossprod(X,Z)\n+  eig <- eigen(K%*%crossprod(Z,SZ),symmetric=FALSE,EISPACK=TRUE)\n+  if ( is.complex(eig$values) ) {\n+    eig$values <- Re(eig$values)\n+    eig$vectors <- Re(eig$vectors)    \n+  }\n+  qr.X <- qr.Q(qr(X))\n+  return(list(values=eig$values[1:(t-q)],\n+              vectors=qr.Q(qr(cbind(SZ%*%eig$vectors[,1:(t-q)],qr.X)),\n+                complete=TRUE)[,c(1:(t-q),(t+1):n)]))   \n+}\n+\n+emma.delta.ML.LL.wo.Z <- function(logdelta, lambda, etas, xi) {\n+  n <- length(xi)\n+  delta <- exp(logdelta)\n+  return( 0.5*(n*(log(n/(2*pi))-1-log(sum((etas*etas)/(lambda+delta))))-sum(log(xi+delta))) )  \n+}\n+\n+emma.delta.ML.LL.w.Z <- function(logdelta, lambda, etas.1, xi.1, n, etas.2.sq ) {\n+  t <- length(xi.1)\n+  delta <- exp(logdelta)\n+#  stopifnot(length(lambda) == length(etas.1))\n+  return( 0.5*(n*(log(n/(2*pi))-1-log(sum(etas.1*etas.1/(lambda+delta))+etas.2.sq/delta))-(sum(log(xi.1+delta))+(n-t)*logdelta)) )\n+}\n+\n+emma.delta.ML.dLL.wo.Z <- functio'..b',drop=FALSE]))\n+          if ( nv == t ) {\n+            eig.R1 = emma.eigen.R.w.Z(Z,K,X)\n+          }          \n+        }\n+\n+        for(j in 1:g) {\n+          vrows <- !is.na(ys[j,])\n+          if ( nv == t ) {\n+            yv <- ys[j,vrows]\n+            nr <- sum(vrows)\n+            if ( is.null(Z) ) {\n+              if ( nr == n ) {\n+                REMLE <- emma.REMLE(yv,X,K,NULL,ngrids,llim,ulim,esp,eig.R1)\n+                U <- eig.L$vectors * matrix(sqrt(1/(eig.L$values+REMLE$delta)),n,n,byrow=TRUE)                \n+              }\n+              else {\n+                eig.L0 <- emma.eigen.L.wo.Z(K[vrows,vrows,drop=FALSE])\n+                REMLE <- emma.REMLE(yv,X[vrows,,drop=FALSE],K[vrows,vrows,drop=FALSE],NULL,ngrids,llim,ulim,esp)\n+                U <- eig.L0$vectors * matrix(sqrt(1/(eig.L0$values+REMLE$delta)),nr,nr,byrow=TRUE)\n+              }\n+              dfs[i,j] <- nr-q1\n+            }\n+            else {\n+              if ( nr == n ) {\n+                REMLE <- emma.REMLE(yv,X,K,Z,ngrids,llim,ulim,esp,eig.R1)\n+                U <- eig.L$vectors * matrix(c(sqrt(1/(eig.L$values+REMLE$delta)),rep(sqrt(1/REMLE$delta),n-t)),n,n,byrow=TRUE)                \n+              }\n+              else {\n+                vtids <- as.logical(colSums(Z[vrows,,drop=FALSE]))\n+                eig.L0 <- emma.eigen.L.w.Z(Z[vrows,vtids,drop=FALSE],K[vtids,vtids,drop=FALSE])\n+                REMLE <- emma.REMLE(yv,X[vrows,,drop=FALSE],K[vtids,vtids,drop=FALSE],Z[vrows,vtids,drop=FALSE],ngrids,llim,ulim,esp)\n+                U <- eig.L0$vectors * matrix(c(sqrt(1/(eig.L0$values+REMLE$delta)),rep(sqrt(1/REMLE$delta),nr-sum(vtids))),nr,nr,byrow=TRUE)\n+              }\n+              dfs[i,j] <- nr-q1\n+            }\n+\n+            yt <- crossprod(U,yv)\n+            Xt <- crossprod(U,X[vrows,,drop=FALSE])\n+            iXX <- solve(crossprod(Xt,Xt))\n+            beta <- iXX%*%crossprod(Xt,yt)\n+            if ( !ponly ) {\n+              vgs[i,j] <- REMLE$vg\n+              ves[i,j] <- REMLE$ve\n+              REMLs[i,j] <- REMLE$REML\n+            }\n+            stats[i,j] <- beta[q1]/sqrt(iXX[q1,q1]*REMLE$vg)\n+          }\n+          else {\n+            if ( is.null(Z) ) {\n+              vtids <- vrows & vids\n+              eig.L0 <- emma.eigen.L.wo.Z(K[vtids,vtids,drop=FALSE])\n+              yv <- ys[j,vtids]\n+              nr <- sum(vtids)\n+              REMLE <- emma.REMLE(yv,X[vtids,,drop=FALSE],K[vtids,vtids,drop=FALSE],NULL,ngrids,llim,ulim,esp)\n+              U <- eig.L0$vectors * matrix(sqrt(1/(eig.L0$values+REMLE$delta)),nr,nr,byrow=TRUE)\n+              Xt <- crossprod(U,X[vtids,,drop=FALSE])\n+              dfs[i,j] <- nr-q1\n+            }\n+            else {\n+              vtids <- as.logical(colSums(Z[vrows,,drop=FALSE])) & vids\n+              vtrows <- vrows & as.logical(rowSums(Z[,vids,drop=FALSE]))\n+              eig.L0 <- emma.eigen.L.w.Z(Z[vtrows,vtids,drop=FALSE],K[vtids,vtids,drop=FALSE])\n+              yv <- ys[j,vtrows]\n+              nr <- sum(vtrows)\n+              REMLE <- emma.REMLE(yv,X[vtrows,,drop=FALSE],K[vtids,vtids,drop=FALSE],Z[vtrows,vtids,drop=FALSE],ngrids,llim,ulim,esp)\n+              U <- eig.L0$vectors * matrix(c(sqrt(1/(eig.L0$values+REMLE$delta)),rep(sqrt(1/REMLE$delta),nr-sum(vtids))),nr,nr,byrow=TRUE)\n+              Xt <- crossprod(U,X[vtrows,,drop=FALSE])\n+              dfs[i,j] <- nr-q1\n+            }\n+            yt <- crossprod(U,yv)\n+            iXX <- solve(crossprod(Xt,Xt))\n+            beta <- iXX%*%crossprod(Xt,yt)\n+            if ( !ponly ) {\n+              vgs[i,j] <- REMLE$vg\n+              ves[i,j] <- REMLE$ve\n+              REMLs[i,j] <- REMLE$REML\n+            }\n+            stats[i,j] <- beta[q1]/sqrt(iXX[q1,q1]*REMLE$vg)\n+            \n+          }\n+        }\n+        ps[i,] <- 2*pt(abs(stats[i,]),dfs[i,],lower.tail=FALSE)        \n+      }\n+    }    \n+  }\n+  if ( ponly ) {\n+    return (ps)\n+  }\n+  else {\n+    return (list(ps=ps,REMLs=REMLs,stats=stats,dfs=dfs,vgs=vgs,ves=ves))\n+  }\n+}\n'
b
diff -r 000000000000 -r 6b7107812931 mlmm/source_library/emma.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mlmm/source_library/emma.r Thu Jul 02 05:42:38 2015 -0400
[
b'@@ -0,0 +1,1274 @@\n+emma.kinship <- function(snps, method="additive", use="all") {\n+  n0 <- sum(snps==0,na.rm=TRUE)\n+  nh <- sum(snps==0.5,na.rm=TRUE)\n+  n1 <- sum(snps==1,na.rm=TRUE)\n+  nNA <- sum(is.na(snps))\n+\n+  stopifnot(n0+nh+n1+nNA == length(snps))\n+\n+  if ( method == "dominant" ) {\n+    flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) > 0.5),nrow(snps),ncol(snps))\n+    snps[!is.na(snps) & (snps == 0.5)] <- flags[!is.na(snps) & (snps == 0.5)]\n+  }\n+  else if ( method == "recessive" ) {\n+    flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) < 0.5),nrow(snps),ncol(snps))\n+    snps[!is.na(snps) & (snps == 0.5)] <- flags[!is.na(snps) & (snps == 0.5)]\n+  }\n+  else if ( ( method == "additive" ) && ( nh > 0 ) ) {\n+    dsnps <- snps\n+    rsnps <- snps\n+    flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) > 0.5),nrow(snps),ncol(snps))\n+    dsnps[!is.na(snps) & (snps==0.5)] <- flags[!is.na(snps) & (snps==0.5)]\n+    flags <- matrix(as.double(rowMeans(snps,na.rm=TRUE) < 0.5),nrow(snps),ncol(snps))\n+    rsnps[!is.na(snps) & (snps==0.5)] <- flags[!is.na(snps) & (snps==0.5)]\n+    snps <- rbind(dsnps,rsnps)\n+  }\n+\n+  if ( use == "all" ) {\n+    mafs <- matrix(rowMeans(snps,na.rm=TRUE),nrow(snps),ncol(snps))\n+    snps[is.na(snps)] <- mafs[is.na(snps)]\n+  }\n+  else if ( use == "complete.obs" ) {\n+    snps <- snps[rowSums(is.na(snps))==0,]\n+  }\n+\n+  n <- ncol(snps)\n+  K <- matrix(nrow=n,ncol=n)\n+  diag(K) <- 1\n+\n+  for(i in 2:n) {\n+    for(j in 1:(i-1)) {\n+      x <- snps[,i]*snps[,j] + (1-snps[,i])*(1-snps[,j])\n+      K[i,j] <- sum(x,na.rm=TRUE)/sum(!is.na(x))\n+      K[j,i] <- K[i,j]\n+    }\n+  }\n+  return(K)\n+}\n+\n+emma.eigen.L <- function(Z,K,complete=TRUE) {\n+  if ( is.null(Z) ) {\n+    return(emma.eigen.L.wo.Z(K))\n+  }\n+  else {\n+    return(emma.eigen.L.w.Z(Z,K,complete))\n+  }\n+}\n+\n+emma.eigen.L.wo.Z <- function(K) {\n+  eig <- eigen(K,symmetric=TRUE)\n+  return(list(values=eig$values,vectors=eig$vectors))\n+}\n+\n+emma.eigen.L.w.Z <- function(Z,K,complete=TRUE) {\n+  if ( complete == FALSE ) {\n+    vids <- colSums(Z)>0\n+    Z <- Z[,vids]\n+    K <- K[vids,vids]\n+  }\n+  eig <- eigen(K%*%crossprod(Z,Z),symmetric=FALSE,EISPACK=TRUE)\n+  return(list(values=eig$values,vectors=qr.Q(qr(Z%*%eig$vectors),complete=TRUE)))\n+}\n+\n+emma.eigen.R <- function(Z,K,X,complete=TRUE) {\n+  if ( ncol(X) == 0 ) {\n+    return(emma.eigen.L(Z,K))\n+  }\n+  else if ( is.null(Z) ) {\n+    return(emma.eigen.R.wo.Z(K,X))\n+  }\n+  else {\n+    return(emma.eigen.R.w.Z(Z,K,X,complete))\n+  }\n+}\n+\n+emma.eigen.R.wo.Z <- function(K, X) {\n+  n <- nrow(X)\n+  q <- ncol(X)\n+  S <- diag(n)-X%*%solve(crossprod(X,X))%*%t(X)\n+  eig <- eigen(S%*%(K+diag(1,n))%*%S,symmetric=TRUE)\n+  stopifnot(!is.complex(eig$values))\n+  return(list(values=eig$values[1:(n-q)]-1,vectors=eig$vectors[,1:(n-q)]))\n+}\n+\n+emma.eigen.R.w.Z <- function(Z, K, X, complete = TRUE) {\n+  if ( complete == FALSE ) {\n+    vids <-  colSums(Z) > 0\n+    Z <- Z[,vids]\n+    K <- K[vids,vids]\n+  }\n+  n <- nrow(Z)\n+  t <- ncol(Z)\n+  q <- ncol(X)\n+  \n+  SZ <- Z - X%*%solve(crossprod(X,X))%*%crossprod(X,Z)\n+  eig <- eigen(K%*%crossprod(Z,SZ),symmetric=FALSE,EISPACK=TRUE)\n+  if ( is.complex(eig$values) ) {\n+    eig$values <- Re(eig$values)\n+    eig$vectors <- Re(eig$vectors)    \n+  }\n+  qr.X <- qr.Q(qr(X))\n+  return(list(values=eig$values[1:(t-q)],\n+              vectors=qr.Q(qr(cbind(SZ%*%eig$vectors[,1:(t-q)],qr.X)),\n+                complete=TRUE)[,c(1:(t-q),(t+1):n)]))   \n+}\n+\n+emma.delta.ML.LL.wo.Z <- function(logdelta, lambda, etas, xi) {\n+  n <- length(xi)\n+  delta <- exp(logdelta)\n+  return( 0.5*(n*(log(n/(2*pi))-1-log(sum((etas*etas)/(lambda+delta))))-sum(log(xi+delta))) )  \n+}\n+\n+emma.delta.ML.LL.w.Z <- function(logdelta, lambda, etas.1, xi.1, n, etas.2.sq ) {\n+  t <- length(xi.1)\n+  delta <- exp(logdelta)\n+#  stopifnot(length(lambda) == length(etas.1))\n+  return( 0.5*(n*(log(n/(2*pi))-1-log(sum(etas.1*etas.1/(lambda+delta))+etas.2.sq/delta))-(sum(log(xi.1+delta))+(n-t)*logdelta)) )\n+}\n+\n+emma.delta.ML.dLL.wo.Z <- functio'..b',drop=FALSE]))\n+          if ( nv == t ) {\n+            eig.R1 = emma.eigen.R.w.Z(Z,K,X)\n+          }          \n+        }\n+\n+        for(j in 1:g) {\n+          vrows <- !is.na(ys[j,])\n+          if ( nv == t ) {\n+            yv <- ys[j,vrows]\n+            nr <- sum(vrows)\n+            if ( is.null(Z) ) {\n+              if ( nr == n ) {\n+                REMLE <- emma.REMLE(yv,X,K,NULL,ngrids,llim,ulim,esp,eig.R1)\n+                U <- eig.L$vectors * matrix(sqrt(1/(eig.L$values+REMLE$delta)),n,n,byrow=TRUE)                \n+              }\n+              else {\n+                eig.L0 <- emma.eigen.L.wo.Z(K[vrows,vrows,drop=FALSE])\n+                REMLE <- emma.REMLE(yv,X[vrows,,drop=FALSE],K[vrows,vrows,drop=FALSE],NULL,ngrids,llim,ulim,esp)\n+                U <- eig.L0$vectors * matrix(sqrt(1/(eig.L0$values+REMLE$delta)),nr,nr,byrow=TRUE)\n+              }\n+              dfs[i,j] <- nr-q1\n+            }\n+            else {\n+              if ( nr == n ) {\n+                REMLE <- emma.REMLE(yv,X,K,Z,ngrids,llim,ulim,esp,eig.R1)\n+                U <- eig.L$vectors * matrix(c(sqrt(1/(eig.L$values+REMLE$delta)),rep(sqrt(1/REMLE$delta),n-t)),n,n,byrow=TRUE)                \n+              }\n+              else {\n+                vtids <- as.logical(colSums(Z[vrows,,drop=FALSE]))\n+                eig.L0 <- emma.eigen.L.w.Z(Z[vrows,vtids,drop=FALSE],K[vtids,vtids,drop=FALSE])\n+                REMLE <- emma.REMLE(yv,X[vrows,,drop=FALSE],K[vtids,vtids,drop=FALSE],Z[vrows,vtids,drop=FALSE],ngrids,llim,ulim,esp)\n+                U <- eig.L0$vectors * matrix(c(sqrt(1/(eig.L0$values+REMLE$delta)),rep(sqrt(1/REMLE$delta),nr-sum(vtids))),nr,nr,byrow=TRUE)\n+              }\n+              dfs[i,j] <- nr-q1\n+            }\n+\n+            yt <- crossprod(U,yv)\n+            Xt <- crossprod(U,X[vrows,,drop=FALSE])\n+            iXX <- solve(crossprod(Xt,Xt))\n+            beta <- iXX%*%crossprod(Xt,yt)\n+            if ( !ponly ) {\n+              vgs[i,j] <- REMLE$vg\n+              ves[i,j] <- REMLE$ve\n+              REMLs[i,j] <- REMLE$REML\n+            }\n+            stats[i,j] <- beta[q1]/sqrt(iXX[q1,q1]*REMLE$vg)\n+          }\n+          else {\n+            if ( is.null(Z) ) {\n+              vtids <- vrows & vids\n+              eig.L0 <- emma.eigen.L.wo.Z(K[vtids,vtids,drop=FALSE])\n+              yv <- ys[j,vtids]\n+              nr <- sum(vtids)\n+              REMLE <- emma.REMLE(yv,X[vtids,,drop=FALSE],K[vtids,vtids,drop=FALSE],NULL,ngrids,llim,ulim,esp)\n+              U <- eig.L0$vectors * matrix(sqrt(1/(eig.L0$values+REMLE$delta)),nr,nr,byrow=TRUE)\n+              Xt <- crossprod(U,X[vtids,,drop=FALSE])\n+              dfs[i,j] <- nr-q1\n+            }\n+            else {\n+              vtids <- as.logical(colSums(Z[vrows,,drop=FALSE])) & vids\n+              vtrows <- vrows & as.logical(rowSums(Z[,vids,drop=FALSE]))\n+              eig.L0 <- emma.eigen.L.w.Z(Z[vtrows,vtids,drop=FALSE],K[vtids,vtids,drop=FALSE])\n+              yv <- ys[j,vtrows]\n+              nr <- sum(vtrows)\n+              REMLE <- emma.REMLE(yv,X[vtrows,,drop=FALSE],K[vtids,vtids,drop=FALSE],Z[vtrows,vtids,drop=FALSE],ngrids,llim,ulim,esp)\n+              U <- eig.L0$vectors * matrix(c(sqrt(1/(eig.L0$values+REMLE$delta)),rep(sqrt(1/REMLE$delta),nr-sum(vtids))),nr,nr,byrow=TRUE)\n+              Xt <- crossprod(U,X[vtrows,,drop=FALSE])\n+              dfs[i,j] <- nr-q1\n+            }\n+            yt <- crossprod(U,yv)\n+            iXX <- solve(crossprod(Xt,Xt))\n+            beta <- iXX%*%crossprod(Xt,yt)\n+            if ( !ponly ) {\n+              vgs[i,j] <- REMLE$vg\n+              ves[i,j] <- REMLE$ve\n+              REMLs[i,j] <- REMLE$REML\n+            }\n+            stats[i,j] <- beta[q1]/sqrt(iXX[q1,q1]*REMLE$vg)\n+            \n+          }\n+        }\n+        ps[i,] <- 2*pt(abs(stats[i,]),dfs[i,],lower.tail=FALSE)        \n+      }\n+    }    \n+  }\n+  if ( ponly ) {\n+    return (ps)\n+  }\n+  else {\n+    return (list(ps=ps,REMLs=REMLs,stats=stats,dfs=dfs,vgs=vgs,ves=ves))\n+  }\n+}\n'
b
diff -r 000000000000 -r 6b7107812931 mlmm/source_library/mlmm.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mlmm/source_library/mlmm.r Thu Jul 02 05:42:38 2015 -0400
[
b"@@ -0,0 +1,477 @@\n+##############################################################################################################################################\r\n+###MLMM - Multi-Locus Mixed Model\r\n+###SET OF FUNCTIONS TO CARRY GWAS CORRECTING FOR POPULATION STRUCTURE WHILE INCLUDING COFACTORS THROUGH A STEPWISE-REGRESSION APPROACH\r\n+#######\r\n+#\r\n+##note: require EMMA\r\n+#library(emma)\r\n+#source('emma.r')\r\n+#\r\n+##REQUIRED DATA & FORMAT\r\n+#\r\n+#PHENOTYPE - Y: a vector of length m, with names(Y)=individual names\r\n+#GENOTYPE - X: a n by m matrix, where n=number of individuals, m=number of SNPs, with rownames(X)=individual names, and colnames(X)=SNP names\r\n+#KINSHIP - K: a n by n matrix, with rownames(K)=colnames(K)=individual names\r\n+#each of these data being sorted in the same way, according to the individual name\r\n+#\r\n+##FOR PLOTING THE GWAS RESULTS\r\n+#SNP INFORMATION - snp_info: a data frame having at least 3 columns:\r\n+# - 1 named 'SNP', with SNP names (same as colnames(X)),\r\n+# - 1 named 'Chr', with the chromosome number to which belong each SNP\r\n+# - 1 named 'Pos', with the position of the SNP onto the chromosome it belongs to.\r\n+#######\r\n+#\r\n+##FUNCTIONS USE\r\n+#save this file somewhere on your computer and source it!\r\n+#source('path/mlmm.r')\r\n+#\r\n+###FORWARD + BACKWARD ANALYSES\r\n+#mygwas<-mlmm(Y,X,K,nbchunks,maxsteps)\r\n+#X,Y,K as described above\r\n+#nbchunks: an integer defining the number of chunks of X to run the analysis, allows to decrease the memory usage ==> minimum=2, increase it if you do not have enough memory\r\n+#maxsteps: maximum number of steps desired in the forward approach. The forward approach breaks automatically once the pseudo-heritability is close to 0,\r\n+#\t\t\thowever to avoid doing too many steps in case the pseudo-heritability does not reach a value close to 0, this parameter is also used.\r\n+#\t\t\tIt's value must be specified as an integer >= 3\r\n+#\r\n+###RESULTS\r\n+#\r\n+##STEPWISE TABLE\r\n+#mygwas$step_table\r\n+#\r\n+##PLOTS\r\n+#\r\n+##PLOTS FORM THE FORWARD TABLE\r\n+#plot_step_table(mygwas,type=c('h2','maxpval','BIC','extBIC'))\r\n+#\r\n+##RSS PLOT\r\n+#plot_step_RSS(mygwas)\r\n+#\r\n+##GWAS MANHATTAN PLOTS\r\n+#\r\n+#FORWARD STEPS\r\n+#plot_fwd_GWAS(mygwas,step,snp_info,pval_filt)\r\n+#step=the step to be plotted in the forward approach, where 1 is the EMMAX scan (no cofactor)\r\n+#snp_info as described above\r\n+#pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot\r\n+#\r\n+#OPTIMAL MODELS\r\n+#Automatic identification of the optimal models within the forwrad-backward models according to the extendedBIC or multiple-bonferonni criteria\r\n+#\r\n+#plot_opt_GWAS(mygwas,opt=c('extBIC','mbonf'),snp_info,pval_filt)\r\n+#snp_info as described above\r\n+#pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot\r\n+#\r\n+##GWAS MANHATTAN PLOT ZOOMED IN A REGION OF INTEREST\r\n+#plot_fwd_region(mygwas,step,snp_info,pval_filt,chrom,pos1,pos2)\r\n+#step=the step to be plotted in the forward approach, where 1 is the EMMAX scan (no cofactor)\r\n+#snp_info as described above\r\n+#pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot\r\n+#chrom is an integer specifying the chromosome on which the region of interest is\r\n+#pos1, pos2 are integers delimiting the region of interest in the same unit as Pos in snp_info\r\n+#\r\n+#plot_opt_region(mygwas,opt=c('extBIC','mbonf'),snp_info,pval_filt,chrom,pos1,pos2)\r\n+#snp_info as described above\r\n+#pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot\r\n+#chrom is an integer specifying the chromosome on which the region of interest is\r\n+#pos1, pos2 are integers delimiting the region of interest in the same unit as Pos in snp_info\r\n+#\r\n+##QQPLOTS of pvalues\r\n+#qqplot_fwd_GWAS(mygwas,nsteps)\r\n+#nsteps=maximum number of forward steps to be displayed\r\n+#\r\n+#qqplot_opt_GWAS(mygwas,opt=c('extBIC"..b'ok$Chr),min)$x+aggregate(output_ok$xpos,list(output_ok$Chr),max)$x)/2\r\n+\r\n+\tplot(output_ok$xpos,-log10(output_ok$pval),col=output_ok$col,pch=20,ylab=\'-log10(pval)\',xaxt=\'n\',xlab=\'chromosome\')\r\n+\taxis(1,tick=FALSE,at=d,labels=c(1:max(unique(output_ok$Chr))))\r\n+\tabline(h=x$bonf_thresh,lty=3,col=\'black\')}\r\n+\r\n+plot_region<-function(x,chrom,pos1,pos2){\r\n+\tregion<-subset(x$out,Chr==chrom & Pos>=pos1 & Pos <=pos2)\r\n+\tregion$col<- if (chrom %% 2 == 0) {\'gray60\'} else {\'gray10\'}\r\n+\tregion$col[which(region$SNP %in% x$cof)]<-\'red\'\r\n+\tplot(region$Pos,-log10(region$pval),type=\'p\',pch=20,main=paste(\'chromosome\',chrom,sep=\'\'),xlab=\'position (bp)\',ylab=\'-log10(pval)\',col=region$col,xlim=c(pos1,pos2))\r\n+\tabline(h=x$bonf_thresh,lty=3,col=\'black\')}\r\n+\r\n+\r\n+plot_fwd_GWAS<-function(x,step,snp_info,pval_filt) {\r\n+\tstopifnot(step<=length(x$pval_step))\r\n+\toutput<-list(out=subset(merge(snp_info,x$pval_step[[step]]$out,by=\'SNP\'),pval<=pval_filt),cof=x$pval_step[[step]]$cof,bonf_thresh=x$bonf_thresh)\r\n+\tplot_GWAS(output)}\r\n+\r\n+plot_fwd_region<-function(x,step,snp_info,pval_filt,chrom,pos1,pos2) {\r\n+\tstopifnot(step<=length(x$pval_step))\r\n+\toutput<-list(out=subset(merge(snp_info,x$pval_step[[step]]$out,by=\'SNP\'),pval<=pval_filt),cof=x$pval_step[[step]]$cof,bonf_thresh=x$bonf_thresh)\r\n+\tplot_region(output,chrom,pos1,pos2)}\r\n+\r\n+\r\n+plot_opt_GWAS<-function(x,opt,snp_info,pval_filt) {\r\n+\tif (opt==\'extBIC\') {output<-list(out=subset(merge(snp_info,x$opt_extBIC$out,by=\'SNP\'),pval<=pval_filt),cof=x$opt_extBIC$cof,bonf_thresh=x$bonf_thresh)\r\n+\t\tplot_GWAS(output)}\r\n+\telse if (opt==\'mbonf\') {output<-list(out=subset(merge(snp_info,x$opt_mbonf$out,by=\'SNP\'),pval<=pval_filt),cof=x$opt_mbonf$cof,bonf_thresh=x$bonf_thresh)\r\n+\t\tplot_GWAS(output)}\r\n+\telse {cat(\'error! \\n opt must be extBIC or mbonf\')}}\r\n+\r\n+plot_opt_region<-function(x,opt,snp_info,pval_filt,chrom,pos1,pos2) {\r\n+\tif (opt==\'extBIC\') {output<-list(out=subset(merge(snp_info,x$opt_extBIC$out,by=\'SNP\'),pval<=pval_filt),cof=x$opt_extBIC$cof,bonf_thresh=x$bonf_thresh)\r\n+\t\tplot_region(output,chrom,pos1,pos2)}\r\n+\telse if (opt==\'mbonf\') {output<-list(out=subset(merge(snp_info,x$opt_mbonf$out,by=\'SNP\'),pval<=pval_filt),cof=x$opt_mbonf$cof,bonf_thresh=x$bonf_thresh)\r\n+\t\tplot_region(output,chrom,pos1,pos2)}\r\n+\telse {cat(\'error! \\n opt must be extBIC or mbonf\')}}\r\n+\r\n+\r\n+qqplot_fwd_GWAS<-function(x,nsteps){\r\n+\tstopifnot(nsteps<=length(x$pval_step))\r\n+\te<--log10(ppoints(nrow(x$pval_step[[1]]$out)))\r\n+\tostep<-list()\r\n+\tostep[[1]]<--log10(sort(x$pval_step[[1]]$out$pval))\r\n+\tfor (i in 2:nsteps) {ostep[[i]]<--log10(sort(x$pval_step[[i]]$out$pval))}\r\n+\r\n+\tmaxp<-ceiling(max(unlist(ostep)))\r\n+\r\n+\tplot(e,ostep[[1]],type=\'b\',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp))\r\n+\tabline(0,1,col="dark grey")\r\n+\r\n+\tfor (i in 2:nsteps) {\r\n+\tpar(new=T)\r\n+\tplot(e,ostep[[i]],type=\'b\',pch=20,cex=0.8,col=i,axes=\'F\',xlab=\'\',ylab=\'\',xlim=c(0,max(e)+1),ylim=c(0,maxp))}\r\n+\tlegend(0,maxp,lty=1,pch=20,col=c(1:length(ostep)),paste(c(0:(length(ostep)-1)),\'cof\',sep=\' \'))\r\n+}\r\n+\r\n+qqplot_opt_GWAS<-function(x,opt){\r\n+\tif (opt==\'extBIC\') {\r\n+\t\te<--log10(ppoints(nrow(x$opt_extBIC$out)))\r\n+\t\to<--log10(sort(x$opt_extBIC$out$pval))\r\n+\t\tmaxp<-ceiling(max(o))\r\n+\t\tplot(e,o,type=\'b\',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp),main=paste(\'optimal model according to extBIC\'))\r\n+\t\tabline(0,1,col="dark grey")}\r\n+\telse if (opt==\'mbonf\') {\r\n+\t\te<--log10(ppoints(nrow(x$opt_mbonf$out)))\r\n+\t\to<--log10(sort(x$opt_mbonf$out$pval))\r\n+\t\tmaxp<-ceiling(max(o))\r\n+\t\tplot(e,o,type=\'b\',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp),main=paste(\'optimal model according to mbonf\'))\r\n+\t\tabline(0,1,col="dark grey")}\r\n+\telse {cat(\'error! \\n opt must be extBIC or mbonf\')}}\r\n+\r\n+\r\n'
b
diff -r 000000000000 -r 6b7107812931 mlmm/source_library/mlmm1.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mlmm/source_library/mlmm1.r Thu Jul 02 05:42:38 2015 -0400
[
b"@@ -0,0 +1,486 @@\n+##############################################################################################################################################\r\n+###MLMM - Multi-Locus Mixed Model\r\n+###SET OF FUNCTIONS TO CARRY GWAS CORRECTING FOR POPULATION STRUCTURE WHILE INCLUDING COFACTORS THROUGH A STEPWISE-REGRESSION APPROACH\r\n+#######\r\n+#\r\n+##note: require EMMA\r\n+#library(emma)\r\n+#source('emma.r')\r\n+#\r\n+##REQUIRED DATA & FORMAT\r\n+#\r\n+#PHENOTYPE - Y: a vector of length m, with names(Y)=individual names\r\n+#GENOTYPE - X: a n by m matrix, where n=number of individuals, m=n umber of SNPs, with rownames(X)=individual names, and colnames(X)=SNP names\r\n+#KINSHIP - K: a n by n matrix, with rownames(K)=colnames(K)=individual names\r\n+#each of these data being sorted in the same way, according to the individual name\r\n+#\r\n+##FOR PLOTING THE GWAS RESULTS\r\n+#SNP INFORMATION - snp_info: a data frame having at least 3 columns:\r\n+# - 1 named 'SNP', with SNP names (same as colnames(X)),\r\n+# - 1 named 'Chr', with the chromosome number to which belong each SNP\r\n+# - 1 named 'Pos', with the position of the SNP onto the chromosome it belongs to.\r\n+#######\r\n+#\r\n+##FUNCTIONS USE\r\n+#save this file somewhere on your computer and source it!\r\n+#source('path/mlmm.r')\r\n+#\r\n+###FORWARD + BACKWARD ANALYSES\r\n+#mygwas<-mlmm(Y,X,K,nbchunks,maxsteps)\r\n+#X,Y,K as described above\r\n+#nbchunks: an integer defining the number of chunks of to run the analysis, allows to decrease the memory usage ==> minimum=2, increase it if you do not have enough memory\r\n+#maxsteps: maximum number of steps desired in the forward approach. The forward approach breaks automatically once the pseudo-heritability is close to 0,\r\n+#\t\t\thowever to avoid doing too many steps in case the pseudo-heritability does not reach a value close to 0, this parameter is also used.\r\n+#\t\t\tIt's value must be specified as an integer >= 3\r\n+#\r\n+###RESULTS\r\n+#\r\n+##STEPWISE TABLE\r\n+#mygwas$step_table\r\n+#\r\n+##PLOTS\r\n+#\r\n+##PLOTS FORM THE FORWARD TABLE\r\n+#plot_step_table(mygwas,type=c('h2','maxpval','BIC','extBIC'))\r\n+#\r\n+##RSS PLOT\r\n+#plot_step_RSS(mygwas)\r\n+#\r\n+##GWAS MANHATTAN PLOTS\r\n+#\r\n+#FORWARD STEPS\r\n+#plot_fwd_GWAS(mygwas,step,snp_info,pval_filt)\r\n+#step=the step to be plotted in the forward approach, where 1 is the EMMAX scan (no cofactor)\r\n+#snp_info as described above\r\n+#pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot\r\n+#\r\n+#OPTIMAL MODELS\r\n+#Automatic identification of the optimal models within the forwrad-backward models according to the extendedBIC or multiple-bonferonni criteria\r\n+#\r\n+#plot_opt_GWAS(mygwas,opt=c('extBIC','mbonf'),snp_info,pval_filt)\r\n+#snp_info as described above\r\n+#pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot\r\n+#\r\n+##GWAS MANHATTAN PLOT ZOOMED IN A REGION OF INTEREST\r\n+#plot_fwd_region(mygwas,step,snp_info,pval_filt,chrom,pos1,pos2)\r\n+#step=the step to be plotted in the forward approach, where 1 is the EMMAX scan (no cofactor)\r\n+#snp_info as described above\r\n+#pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot\r\n+#chrom is an integer specifying the chromosome on which the region of interest is\r\n+#pos1, pos2 are integers delimiting the region of interest in the same unit as Pos in snp_info\r\n+#\r\n+#plot_opt_region(mygwas,opt=c('extBIC','mbonf'),snp_info,pval_filt,chrom,pos1,pos2)\r\n+#snp_info as described above\r\n+#pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot\r\n+#chrom is an integer specifying the chromosome on which the region of interest is\r\n+#pos1, pos2 are integers delimiting the region of interest in the same unit as Pos in snp_info\r\n+#\r\n+##QQPLOTS of pvalues\r\n+#qqplot_fwd_GWAS(mygwas,nsteps)\r\n+#nsteps=maximum number of forward steps to be displayed\r\n+#\r\n+#qqplot_opt_GWAS(mygwas,opt=c('extBIC'"..b'output_ok$pval) > x$bonf_thresh]), output_ok$SNP[-log10(output_ok$pval) > x$bonf_thresh], pos=3, cex=0.7) \r\n+      legend("topright", lty=3, paste("bonf thresh :", x$bonf_thresh ,sep=" "))\r\n+      } else {\r\n+      legend("topright", lty=3, paste("bonf thresh :", x$bonf_thresh ,sep=" "))\r\n+      }\r\n+}\r\n+  \r\n+plot_region<-function(x,chrom,pos1,pos2){\r\n+\tregion<-subset(x$out,Chr==chrom & Pos>=pos1 & Pos <=pos2)\r\n+\tregion$col<- if (chrom %% 2 == 0) {\'gray60\'} else {\'gray10\'}\r\n+\tregion$col[which(region$SNP %in% x$cof)]<-\'red\'\r\n+\tplot(region$Pos,-log10(region$pval),type=\'p\',pch=20,main=paste(\'chromosome\',chrom,sep=\'\'),xlab=\'position (bp)\',ylab=\'-log10(pval)\',col=region$col,xlim=c(pos1,pos2))\r\n+\tabline(h=x$bonf_thresh,lty=3,col=\'black\')}\r\n+\r\n+\r\n+plot_fwd_GWAS<-function(x,step,snp_info,pval_filt) {\r\n+\tstopifnot(step<=length(x$pval_step))\r\n+\toutput<-list(out=subset(merge(snp_info,x$pval_step[[step]]$out,by=\'SNP\'),pval<=pval_filt),cof=x$pval_step[[step]]$cof,bonf_thresh=x$bonf_thresh)\r\n+\tplot_GWAS(output)}\r\n+\r\n+plot_fwd_region<-function(x,step,snp_info,pval_filt,chrom,pos1,pos2) {\r\n+\tstopifnot(step<=length(x$pval_step))\r\n+\toutput<-list(out=subset(merge(snp_info,x$pval_step[[step]]$out,by=\'SNP\'),pval<=pval_filt),cof=x$pval_step[[step]]$cof,bonf_thresh=x$bonf_thresh)\r\n+\tplot_region(output,chrom,pos1,pos2)}\r\n+\r\n+\r\n+plot_opt_GWAS<-function(x,opt,snp_info,pval_filt) {\r\n+\tif (opt==\'extBIC\') {output<-list(out=subset(merge(snp_info,x$opt_extBIC$out,by=\'SNP\'),pval<=pval_filt),cof=x$opt_extBIC$cof,bonf_thresh=x$bonf_thresh)\r\n+\t\tplot_GWAS(output)}\r\n+\telse if (opt==\'mbonf\') {output<-list(out=subset(merge(snp_info,x$opt_mbonf$out,by=\'SNP\'),pval<=pval_filt),cof=x$opt_mbonf$cof,bonf_thresh=x$bonf_thresh)\r\n+\t\tplot_GWAS(output)}\r\n+\telse {cat(\'error! \\n opt must be extBIC or mbonf\')}}\r\n+\r\n+plot_opt_region<-function(x,opt,snp_info,pval_filt,chrom,pos1,pos2) {\r\n+\tif (opt==\'extBIC\') {output<-list(out=subset(merge(snp_info,x$opt_extBIC$out,by=\'SNP\'),pval<=pval_filt),cof=x$opt_extBIC$cof,bonf_thresh=x$bonf_thresh)\r\n+\t\tplot_region(output,chrom,pos1,pos2)}\r\n+\telse if (opt==\'mbonf\') {output<-list(out=subset(merge(snp_info,x$opt_mbonf$out,by=\'SNP\'),pval<=pval_filt),cof=x$opt_mbonf$cof,bonf_thresh=x$bonf_thresh)\r\n+\t\tplot_region(output,chrom,pos1,pos2)}\r\n+\telse {cat(\'error! \\n opt must be extBIC or mbonf\')}}\r\n+\r\n+\r\n+qqplot_fwd_GWAS<-function(x,nsteps){\r\n+\tstopifnot(nsteps<=length(x$pval_step))\r\n+\te<--log10(ppoints(nrow(x$pval_step[[1]]$out)))\r\n+\tostep<-list()\r\n+\tostep[[1]]<--log10(sort(x$pval_step[[1]]$out$pval))\r\n+\tfor (i in 2:nsteps) {ostep[[i]]<--log10(sort(x$pval_step[[i]]$out$pval))}\r\n+\r\n+\tmaxp<-ceiling(max(unlist(ostep)))\r\n+\r\n+\tplot(e,ostep[[1]],type=\'b\',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp))\r\n+\tabline(0,1,col="dark grey")\r\n+\r\n+\tfor (i in 2:nsteps) {\r\n+\tpar(new=T)\r\n+\tplot(e,ostep[[i]],type=\'b\',pch=20,cex=0.8,col=i,axes=\'F\',xlab=\'\',ylab=\'\',xlim=c(0,max(e)+1),ylim=c(0,maxp))}\r\n+\tlegend(0,maxp,lty=1,pch=20,col=c(1:length(ostep)),paste(c(0:(length(ostep)-1)),\'cof\',sep=\' \'))\r\n+}\r\n+\r\n+qqplot_opt_GWAS<-function(x,opt){\r\n+\tif (opt==\'extBIC\') {\r\n+\t\te<--log10(ppoints(nrow(x$opt_extBIC$out)))\r\n+\t\to<--log10(sort(x$opt_extBIC$out$pval))\r\n+\t\tmaxp<-ceiling(max(o))\r\n+\t\tplot(e,o,type=\'b\',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp),main=paste(\'optimal model according to extBIC\'))\r\n+\t\tabline(0,1,col="dark grey")}\r\n+\telse if (opt==\'mbonf\') {\r\n+\t\te<--log10(ppoints(nrow(x$opt_mbonf$out)))\r\n+\t\to<--log10(sort(x$opt_mbonf$out$pval))\r\n+\t\tmaxp<-ceiling(max(o))\r\n+\t\tplot(e,o,type=\'b\',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp),main=paste(\'optimal model according to mbonf\'))\r\n+\t\tabline(0,1,col="dark grey")}\r\n+\telse {cat(\'error! \\n opt must be extBIC or mbonf\')}}\r\n+\r\n+\r\n'
b
diff -r 000000000000 -r 6b7107812931 mlmm/source_library/plot_MLMM.Ago.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mlmm/source_library/plot_MLMM.Ago.r Thu Jul 02 05:42:38 2015 -0400
[
@@ -0,0 +1,36 @@
+## fonction ###########################################################################
+plot_MLMM.Ago<-function(x) {
+ output1 <-x$out[order(x$out$Pos),]
+ output_ok<-output1[order(output1$Chr),]
+
+ maxpos<-c(0,cumsum(as.numeric(aggregate(output_ok$Pos,list(output_ok$Chr),max)$x+max(cumsum(as.numeric(aggregate(output_ok$Pos,list(output_ok$Chr),max)$x)))/200)))
+ plot_col<-rep(c('gray10','gray60'),ceiling(max(unique(output_ok$Chr))/2))
+# plot_col<-c('blue','darkgreen','red','cyan','purple')
+ size<-aggregate(output_ok$Pos,list(output_ok$Chr),length)$x
+ a<-rep(maxpos[1],size[1])
+ b<-rep(plot_col[1],size[1])
+ for (i in 2:max(unique(output_ok$Chr))){
+ a<-c(a,rep(maxpos[i],size[i]))
+ b<-c(b,rep(plot_col[i],size[i]))}
+
+ output_ok$xpos<-output_ok$Pos+a
+ output_ok$col<-b
+ output_ok$col[output_ok$mk=='qtl']<-'cyan'
+ output_ok$col[output_ok$SNP %in% x$cof]<-'red'
+
+ d<-(aggregate(output_ok$xpos,list(output_ok$Chr),min)$x+aggregate(output_ok$xpos,list(output_ok$Chr),max)$x)/2
+
+ plot(output_ok$xpos,-log10(output_ok$pval),col=output_ok$col,pch=20,ylab='-log10(pval)',xaxt='n',xlab='chromosome')
+ axis(1,tick=FALSE,at=d,labels=c(1:max(unique(output_ok$Chr))))
+  xline(output_ok$xpos[output_ok$mk=='qtl'], col='cyan', lwd=0.1)
+ abline(h=x$bonf_thresh,lty=3,col='black')
+
+
+    if (length(output_ok$pval[-log10(output_ok$pval) > x$bonf_thresh]) > 0) {
+      text(output_ok$xpos[-log10(output_ok$pval) > x$bonf_thresh], -log10(output_ok$pval[-log10(output_ok$pval) > x$bonf_thresh]), output_ok$SNP[-log10(output_ok$pval) > x$bonf_thresh], pos=3, cex=0.7)
+      legend("topright", lty=3, paste("bonf thresh :", x$bonf_thresh ,sep=" "))
+      } else {
+      legend("topright", lty=3, paste("bonf thresh :", x$bonf_thresh ,sep=" "))
+      }
+
+}