Mercurial > repos > gandres > vcftools_filter_stats_diversity
diff vcftools_main/VCFToolsSlidingWindow/VCFToolsSlidingWindow.pl @ 0:3b1436a9a6e5 draft
Uploaded
author | gandres |
---|---|
date | Thu, 02 Jul 2015 05:21:40 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vcftools_main/VCFToolsSlidingWindow/VCFToolsSlidingWindow.pl Thu Jul 02 05:21:40 2015 -0400 @@ -0,0 +1,122 @@ + +#!/usr/bin/perl + +use strict; +use Getopt::Long; + +my $usage = qq~Usage:$0 <args> [<opts>] + +where <args> are: + + -i, --input <VCF input> + -o, --out <output basename> + -s, --step <step size> + -w, --window <window size> + +<opts> are: + -g, --group <group file> +~; +$usage .= "\n"; + +my ($input,$out); +my $window = 200000; +my $step = 200000; +my $groupfile; +GetOptions( + "input=s" => \$input, + "out=s" => \$out, + "step=s" => \$step, + "window=s" => \$window, + "group=s" => \$groupfile +); + + + + +die $usage + if ( !$input); + +my %hash; +my $cmd_part = ""; +if ($groupfile && -e $groupfile) +{ + open(my $G,$groupfile) or die "Cannot open $groupfile: $!"; + while(<$G>) + { + my $line = $_; + chomp($line); + $line=~s/\r//g; + $line=~s/\n//g; + my @infos = split(/;/,$line); + if ($infos[0] && $infos[1]) + { + $hash{$infos[1]} .= " --indv " . $infos[0]; + $cmd_part .= " --indv " . $infos[0]; + } + } + close($G); +} + + +if ($step =~/^(\d+)\s*$/){ + $step = $1; +} +else{ + die "Error: step size must be an integer\n"; +} +if ($window =~/^(\d+)\s*$/){ + $window = $1; +} +else{ + die "Error: window size must be an integer\n"; +} + + +my $VCFTOOLS_EXE = "vcftools"; + +system("vcf-sort $input >$input.sorted"); + + + +system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --window-pi $window --window-pi-step $step >>$out.vcftools.log 2>&1"); +system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --TajimaD $window >>$out.vcftools.log 2>&1"); +system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --TsTv $window >>$out.vcftools.log 2>&1"); +system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --SNPdensity $window >>$out.vcftools.log 2>&1"); + +if (keys(%hash) > 0) +{ + my $files_pi = ""; + my $files_dtajima = ""; + foreach my $pop(sort(keys(%hash))) + { + my $cmd_part = $hash{$pop}; + system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --window-pi $window --window-pi-step $step $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1"); + my $sed_cmd = "sed -i \"s\/PI\/$pop\/g\" $out.$pop.windowed.pi"; + system($sed_cmd); + $files_pi .= "$out.$pop.windowed.pi "; + + system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --SNPdensity $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1"); + + system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --TajimaD $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1"); + my $sed_cmd = "sed -i \"s\/TajimaD\/$pop\/g\" $out.$pop.Tajima.D"; + system($sed_cmd); + $sed_cmd = "sed -i \"s/nan/0/g\" $out.Tajima.D"; + system($sed_cmd); + $files_dtajima .= "$out.$pop.Tajima.D "; + + system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --TsTv $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1"); + } + system("paste $files_pi >>$out.combined.pi"); + my $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$5\"\t\"\$10'} $out.combined.pi >$out.combined.pi.txt"; + system($awk_cmd); + + system("paste $files_dtajima >>$out.combined.dtajima"); + my $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$4\"\t\"\$8'} $out.combined.dtajima >$out.combined.dtajima.txt"; + system($awk_cmd); +} + + + + + +