diff vcftools_main/VCFToolsSlidingWindow/VCFToolsSlidingWindow.pl @ 0:3b1436a9a6e5 draft

Uploaded
author gandres
date Thu, 02 Jul 2015 05:21:40 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vcftools_main/VCFToolsSlidingWindow/VCFToolsSlidingWindow.pl	Thu Jul 02 05:21:40 2015 -0400
@@ -0,0 +1,122 @@
+
+#!/usr/bin/perl
+
+use strict;
+use Getopt::Long;
+
+my $usage = qq~Usage:$0 <args> [<opts>]
+
+where <args> are:
+
+    -i, --input          <VCF input>
+    -o, --out            <output basename>
+    -s, --step           <step size>
+    -w, --window         <window size>
+
+<opts> are:
+    -g, --group          <group file>
+~;
+$usage .= "\n";
+
+my ($input,$out);
+my $window = 200000;
+my $step = 200000;
+my $groupfile;
+GetOptions(
+	"input=s"        => \$input,
+	"out=s"          => \$out,
+	"step=s"         => \$step,
+	"window=s"       => \$window,
+	"group=s"        => \$groupfile
+);
+
+
+
+
+die $usage
+  if ( !$input);
+
+my %hash;
+my $cmd_part = "";
+if ($groupfile && -e $groupfile)
+{
+	open(my $G,$groupfile) or die "Cannot open $groupfile: $!";
+	while(<$G>)
+	{
+		my $line = $_;
+		chomp($line);
+		$line=~s/\r//g;
+		$line=~s/\n//g;
+		my @infos = split(/;/,$line);
+		if ($infos[0] && $infos[1])
+		{
+			$hash{$infos[1]} .= " --indv " . $infos[0];
+			$cmd_part .= " --indv " . $infos[0];
+		}
+	}
+	close($G);
+}
+
+
+if ($step =~/^(\d+)\s*$/){
+	$step = $1;
+}
+else{
+	die "Error: step size must be an integer\n";
+}
+if ($window =~/^(\d+)\s*$/){
+	$window = $1;
+}
+else{
+	die "Error: window size must be an integer\n";
+}
+
+
+my $VCFTOOLS_EXE = "vcftools";
+
+system("vcf-sort $input >$input.sorted");
+
+
+	
+system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --window-pi $window --window-pi-step $step >>$out.vcftools.log 2>&1");
+system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --TajimaD $window >>$out.vcftools.log 2>&1");
+system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --TsTv $window >>$out.vcftools.log 2>&1");
+system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --SNPdensity $window >>$out.vcftools.log 2>&1");
+
+if (keys(%hash) > 0)
+{
+	my $files_pi = "";
+	my $files_dtajima = "";
+	foreach my $pop(sort(keys(%hash)))
+	{
+        	my $cmd_part = $hash{$pop};
+		system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --window-pi $window --window-pi-step $step $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
+		my $sed_cmd = "sed -i \"s\/PI\/$pop\/g\" $out.$pop.windowed.pi";
+		system($sed_cmd);
+		$files_pi .= "$out.$pop.windowed.pi ";
+
+		system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --SNPdensity $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
+
+		system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --TajimaD $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
+		my $sed_cmd = "sed -i \"s\/TajimaD\/$pop\/g\" $out.$pop.Tajima.D";
+                system($sed_cmd);
+		$sed_cmd = "sed -i \"s/nan/0/g\" $out.Tajima.D";
+                system($sed_cmd);
+                $files_dtajima .= "$out.$pop.Tajima.D ";
+		
+		system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --TsTv $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
+	}
+	system("paste $files_pi >>$out.combined.pi");
+	my $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$5\"\t\"\$10'} $out.combined.pi >$out.combined.pi.txt";
+	system($awk_cmd);
+
+	system("paste $files_dtajima >>$out.combined.dtajima");
+        my $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$4\"\t\"\$8'} $out.combined.dtajima >$out.combined.dtajima.txt";
+        system($awk_cmd);
+}
+
+
+
+
+	
+