Mercurial > repos > gandres > vcftools_filter_stats_diversity
view vcftools_main/VCFToolsSlidingWindow/VCFToolsSlidingWindow.pl @ 0:3b1436a9a6e5 draft
Uploaded
author | gandres |
---|---|
date | Thu, 02 Jul 2015 05:21:40 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl use strict; use Getopt::Long; my $usage = qq~Usage:$0 <args> [<opts>] where <args> are: -i, --input <VCF input> -o, --out <output basename> -s, --step <step size> -w, --window <window size> <opts> are: -g, --group <group file> ~; $usage .= "\n"; my ($input,$out); my $window = 200000; my $step = 200000; my $groupfile; GetOptions( "input=s" => \$input, "out=s" => \$out, "step=s" => \$step, "window=s" => \$window, "group=s" => \$groupfile ); die $usage if ( !$input); my %hash; my $cmd_part = ""; if ($groupfile && -e $groupfile) { open(my $G,$groupfile) or die "Cannot open $groupfile: $!"; while(<$G>) { my $line = $_; chomp($line); $line=~s/\r//g; $line=~s/\n//g; my @infos = split(/;/,$line); if ($infos[0] && $infos[1]) { $hash{$infos[1]} .= " --indv " . $infos[0]; $cmd_part .= " --indv " . $infos[0]; } } close($G); } if ($step =~/^(\d+)\s*$/){ $step = $1; } else{ die "Error: step size must be an integer\n"; } if ($window =~/^(\d+)\s*$/){ $window = $1; } else{ die "Error: window size must be an integer\n"; } my $VCFTOOLS_EXE = "vcftools"; system("vcf-sort $input >$input.sorted"); system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --window-pi $window --window-pi-step $step >>$out.vcftools.log 2>&1"); system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --TajimaD $window >>$out.vcftools.log 2>&1"); system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --TsTv $window >>$out.vcftools.log 2>&1"); system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --SNPdensity $window >>$out.vcftools.log 2>&1"); if (keys(%hash) > 0) { my $files_pi = ""; my $files_dtajima = ""; foreach my $pop(sort(keys(%hash))) { my $cmd_part = $hash{$pop}; system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --window-pi $window --window-pi-step $step $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1"); my $sed_cmd = "sed -i \"s\/PI\/$pop\/g\" $out.$pop.windowed.pi"; system($sed_cmd); $files_pi .= "$out.$pop.windowed.pi "; system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --SNPdensity $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1"); system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --TajimaD $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1"); my $sed_cmd = "sed -i \"s\/TajimaD\/$pop\/g\" $out.$pop.Tajima.D"; system($sed_cmd); $sed_cmd = "sed -i \"s/nan/0/g\" $out.Tajima.D"; system($sed_cmd); $files_dtajima .= "$out.$pop.Tajima.D "; system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --TsTv $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1"); } system("paste $files_pi >>$out.combined.pi"); my $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$5\"\t\"\$10'} $out.combined.pi >$out.combined.pi.txt"; system($awk_cmd); system("paste $files_dtajima >>$out.combined.dtajima"); my $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$4\"\t\"\$8'} $out.combined.dtajima >$out.combined.dtajima.txt"; system($awk_cmd); }