changeset 6:626b714f72bb

add gd_ploteig
author Richard Burhans <burhans@bx.psu.edu>
date Tue, 10 Apr 2012 13:51:19 -0400
parents 8a1147101f85
children e29f4d801bb0
files README genome_diversity/Makefile genome_diversity/bin/gd_ploteig
diffstat 3 files changed, 173 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/README	Tue Apr 10 12:49:47 2012 -0400
+++ b/README	Tue Apr 10 13:51:19 2012 -0400
@@ -7,7 +7,7 @@
     networkx   (we used version 1.6)   http://pypi.python.org/packages/source/n/networkx/
 
 And the following software:
-    ADMIXTURE  (we used version 1.22 ) http://www.genetics.ucla.edu/software/admixture/
+    ADMIXTURE  (we used version 1.22)  http://www.genetics.ucla.edu/software/admixture/
     EIGENSOFT  (we used version 3.0)   http://genepath.med.harvard.edu/~reich/Software.htm
     PHAST      (we used version 1.2.1) http://compgen.bscb.cornell.edu/phast/
     QuickTree  (we used version 1.1)   http://www.sanger.ac.uk/resources/software/quicktree/
--- a/genome_diversity/Makefile	Tue Apr 10 12:49:47 2012 -0400
+++ b/genome_diversity/Makefile	Tue Apr 10 13:51:19 2012 -0400
@@ -3,7 +3,6 @@
 
 clean:
 	cd src && make clean
-	rm -rf bin
 
 install:
 	cd src && make install
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/bin/gd_ploteig	Tue Apr 10 13:51:19 2012 -0400
@@ -0,0 +1,172 @@
+#!/usr/bin/env perl
+
+### ploteig -i eigfile -p pops -c a:b [-t title] [-s stem] [-o outfile] [-x] [-k]  [-y] [-z sep]
+use Getopt::Std ;
+use File::Basename ;
+use warnings ;
+
+## pops : separated  -x = make postscript and pdf  -z use another separator
+##  -k keep intermediate files
+## NEW if pops is a file names are read one per line
+
+getopts('i:o:p:c:s:d:z:t:xky',\%opts) ;
+$postscmode = $opts{"x"} ;
+$oldkeystyle =  $opts{"y"} ;
+$kflag = $opts{"k"} ;
+$keepflag = 1 if ($kflag) ;
+$keepflag = 1 unless ($postscmode) ;
+
+$zsep = ":" ;
+if (defined $opts{"z"}) {
+ $zsep = $opts{"z"} ;
+ $zsep = "\+" if ($zsep eq "+") ;
+}
+
+$title = "" ;
+if (defined $opts{"t"}) {
+ $title = $opts{"t"} ;
+}
+if (defined $opts{"i"}) {
+ $infile = $opts{"i"} ;
+}
+else {
+ usage() ;
+ exit 0 ;
+}
+open (FF, $infile) || die "can't open $infile\n" ;
+@L = (<FF>) ;
+chomp @L ;
+$nf = 0 ;
+foreach $line (@L) { 
+ next if ($line =~ /^\s+#/) ;
+ @Z = split " ", $line ;
+ $x = @Z ;
+ $nf = $x if ($nf < $x) ;
+}
+printf "## number of fields: %d\n", $nf ;
+$popcol = $nf-1 ;
+
+
+if (defined $opts{"p"}) {
+ $pops = $opts{"p"} ;
+}
+else {
+ die "p parameter compulsory\n" ;
+}
+
+$popsname = setpops ($pops) ;
+print "$popsname\n" ;
+
+$c1 = 1; $c2 =2 ;
+if (defined $opts{"c"}) {
+ $cols = $opts{"c"} ;
+ ($c1, $c2) = split ":", $cols ;
+ die "bad c param: $cols\n" unless (defined $cols) ;
+}
+
+$stem = "$infile.$c1:$c2" ;
+if (defined $opts{"s"}) {
+ $stem = $opts{"s"} ;
+}
+$gnfile = "$stem.$popsname.xtxt" ;
+ 
+if (defined $opts{"o"}) {
+ $gnfile = $opts{"o"} ;
+}
+
+@T = () ; ## trash 
+open (GG, ">$gnfile") || die "can't open $gnfile\n" ;
+print GG "## " unless ($postscmode) ;
+print GG "set terminal postscript color\n" ;
+print GG "set style line  2 lc rgbcolor \"#376600\"\n";
+print GG "set style line 11 lc rgbcolor \"#376600\"\n";
+print GG "set style line 20 lc rgbcolor \"#376600\"\n";
+print GG "set style line 29 lc rgbcolor \"#376600\"\n";
+print GG "set style line  6 lc rgbcolor \"#FFCC00\"\n";
+print GG "set style line 15 lc rgbcolor \"#FFCC00\"\n";
+print GG "set style line 24 lc rgbcolor \"#FFCC00\"\n";
+print GG "set style increment user\n";
+print GG "set title  \"$title\" \n" ; 
+print GG "set key outside\n" unless ($oldkeystyle) ; 
+print GG "set xlabel  \"eigenvector $c1\" \n" ; 
+print GG "set ylabel  \"eigenvector $c2\" \n" ; 
+print GG "plot " ;
+$np = @P ;
+$lastpop = $P[$np-1] ;
+$d1 = $c1+1 ;
+$d2 = $c2+1 ;
+foreach $pop (@P)  { 
+ $dfile = "$stem:$pop" ;
+ push @T, $dfile ;
+ print GG " \"$dfile\" using $d1:$d2 title \"$pop\" " ;
+ print GG ", \\\n" unless ($pop eq $lastpop) ;
+ open (YY, ">$dfile") || die "can't open $dfile\n" ;
+ foreach $line (@L) {
+  next if ($line =~ /^\s+#/) ;
+  @Z = split " ", $line ;
+  next unless (defined $Z[$popcol]) ;
+  next unless ($Z[$popcol] eq $pop) ;
+  print YY "$line\n" ;
+ }
+ close YY ;
+}
+print GG "\n" ;
+print GG "## "  if ($postscmode) ;
+print GG "pause 9999\n"  ;
+close GG ;
+
+if ($postscmode) { 
+$psfile = "$stem.ps" ;
+
+ if ($gnfile =~ /xtxt/) { 
+  $psfile = $gnfile ;
+  $psfile  =~ s/xtxt/ps/ ;
+ }
+system "gnuplot < $gnfile > $psfile" ;
+#system "fixgreen  $psfile" ;
+system "ps2pdf  $psfile " ;
+}
+unlink (@T) unless $keepflag ;
+
+sub usage { 
+ 
+print "ploteig -i eigfile -p pops -c a:b [-t title] [-s stem] [-o outfile] [-x] [-k]\n" ;  
+print "-i eigfile     input file first col indiv-id last col population\n" ;
+print "## as output by smartpca in outputvecs \n" ;
+print "-c a:b         a, b columns to plot.  1:2 would be common and leading 2 eigenvectors\n" ;
+print "-p pops        Populations to plot.  : delimited.   eg  -p Bantu:San:French\n" ;
+print "## pops can also be a filename.  List populations 1 per line\n" ;
+print "[-s stem]      stem will start various output files\n"  ;
+print "[-o ofile]     ofile will be gnuplot control file.  Should have xtxt suffix\n"; 
+print "[-x]           make ps and pdf files\n" ; 
+print "[-k]           keep various intermediate files although  -x set\n" ;
+print "## necessary if .xtxt file is to be hand edited\n" ;
+print "[-y]           put key at top right inside box (old mode)\n" ;
+print "[-t]           title (legend)\n" ;
+
+print "The xtxt file is a gnuplot file and can be easily hand edited.  Intermediate files
+needed if you want to make your own plot\n" ;
+
+}
+sub setpops {      
+ my ($pops) = @_  ; 
+ local (@a, $d, $b, $e) ; 
+
+ if (-e $pops) {  
+  open (FF1, $pops) || die "can't open $pops\n" ;
+  @P = () ;
+  foreach $line (<FF1>) { 
+  ($a) = split " ", $line ;
+  next unless (defined $a) ;
+  next if ($a =~ /\#/) ;
+  push  @P, $a ;
+  }
+  $out = join ":", @P ; 
+  print "## pops: $out\n" ;
+  ($b, $d , $e) = fileparse($pops) ;
+  return $b ;
+ }
+ @P = split $zsep, $pops ;
+ return $pops ;
+
+}