view vcftools_main/VCFToolsSlidingWindow/VCFToolsSlidingWindow.pl @ 0:3b1436a9a6e5 draft

Uploaded
author gandres
date Thu, 02 Jul 2015 05:21:40 -0400
parents
children
line wrap: on
line source


#!/usr/bin/perl

use strict;
use Getopt::Long;

my $usage = qq~Usage:$0 <args> [<opts>]

where <args> are:

    -i, --input          <VCF input>
    -o, --out            <output basename>
    -s, --step           <step size>
    -w, --window         <window size>

<opts> are:
    -g, --group          <group file>
~;
$usage .= "\n";

my ($input,$out);
my $window = 200000;
my $step = 200000;
my $groupfile;
GetOptions(
	"input=s"        => \$input,
	"out=s"          => \$out,
	"step=s"         => \$step,
	"window=s"       => \$window,
	"group=s"        => \$groupfile
);




die $usage
  if ( !$input);

my %hash;
my $cmd_part = "";
if ($groupfile && -e $groupfile)
{
	open(my $G,$groupfile) or die "Cannot open $groupfile: $!";
	while(<$G>)
	{
		my $line = $_;
		chomp($line);
		$line=~s/\r//g;
		$line=~s/\n//g;
		my @infos = split(/;/,$line);
		if ($infos[0] && $infos[1])
		{
			$hash{$infos[1]} .= " --indv " . $infos[0];
			$cmd_part .= " --indv " . $infos[0];
		}
	}
	close($G);
}


if ($step =~/^(\d+)\s*$/){
	$step = $1;
}
else{
	die "Error: step size must be an integer\n";
}
if ($window =~/^(\d+)\s*$/){
	$window = $1;
}
else{
	die "Error: window size must be an integer\n";
}


my $VCFTOOLS_EXE = "vcftools";

system("vcf-sort $input >$input.sorted");


	
system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --window-pi $window --window-pi-step $step >>$out.vcftools.log 2>&1");
system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --TajimaD $window >>$out.vcftools.log 2>&1");
system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --TsTv $window >>$out.vcftools.log 2>&1");
system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --SNPdensity $window >>$out.vcftools.log 2>&1");

if (keys(%hash) > 0)
{
	my $files_pi = "";
	my $files_dtajima = "";
	foreach my $pop(sort(keys(%hash)))
	{
        	my $cmd_part = $hash{$pop};
		system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --window-pi $window --window-pi-step $step $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
		my $sed_cmd = "sed -i \"s\/PI\/$pop\/g\" $out.$pop.windowed.pi";
		system($sed_cmd);
		$files_pi .= "$out.$pop.windowed.pi ";

		system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --SNPdensity $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");

		system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --TajimaD $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
		my $sed_cmd = "sed -i \"s\/TajimaD\/$pop\/g\" $out.$pop.Tajima.D";
                system($sed_cmd);
		$sed_cmd = "sed -i \"s/nan/0/g\" $out.Tajima.D";
                system($sed_cmd);
                $files_dtajima .= "$out.$pop.Tajima.D ";
		
		system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --TsTv $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
	}
	system("paste $files_pi >>$out.combined.pi");
	my $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$5\"\t\"\$10'} $out.combined.pi >$out.combined.pi.txt";
	system($awk_cmd);

	system("paste $files_dtajima >>$out.combined.dtajima");
        my $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$4\"\t\"\$8'} $out.combined.dtajima >$out.combined.dtajima.txt";
        system($awk_cmd);
}