0
|
1
|
|
2 #!/usr/bin/perl
|
|
3
|
|
4 use strict;
|
|
5 use Getopt::Long;
|
|
6
|
|
7 my $usage = qq~Usage:$0 <args> [<opts>]
|
|
8
|
|
9 where <args> are:
|
|
10
|
|
11 -i, --input <VCF input>
|
|
12 -o, --out <output basename>
|
|
13 -s, --step <step size>
|
|
14 -w, --window <window size>
|
|
15
|
|
16 <opts> are:
|
|
17 -g, --group <group file>
|
|
18 ~;
|
|
19 $usage .= "\n";
|
|
20
|
|
21 my ($input,$out);
|
|
22 my $window = 200000;
|
|
23 my $step = 200000;
|
|
24 my $groupfile;
|
|
25 GetOptions(
|
|
26 "input=s" => \$input,
|
|
27 "out=s" => \$out,
|
|
28 "step=s" => \$step,
|
|
29 "window=s" => \$window,
|
|
30 "group=s" => \$groupfile
|
|
31 );
|
|
32
|
|
33
|
|
34
|
|
35
|
|
36 die $usage
|
|
37 if ( !$input);
|
|
38
|
|
39 my %hash;
|
|
40 my $cmd_part = "";
|
|
41 if ($groupfile && -e $groupfile)
|
|
42 {
|
|
43 open(my $G,$groupfile) or die "Cannot open $groupfile: $!";
|
|
44 while(<$G>)
|
|
45 {
|
|
46 my $line = $_;
|
|
47 chomp($line);
|
|
48 $line=~s/\r//g;
|
|
49 $line=~s/\n//g;
|
|
50 my @infos = split(/;/,$line);
|
|
51 if ($infos[0] && $infos[1])
|
|
52 {
|
|
53 $hash{$infos[1]} .= " --indv " . $infos[0];
|
|
54 $cmd_part .= " --indv " . $infos[0];
|
|
55 }
|
|
56 }
|
|
57 close($G);
|
|
58 }
|
|
59
|
|
60
|
|
61 if ($step =~/^(\d+)\s*$/){
|
|
62 $step = $1;
|
|
63 }
|
|
64 else{
|
|
65 die "Error: step size must be an integer\n";
|
|
66 }
|
|
67 if ($window =~/^(\d+)\s*$/){
|
|
68 $window = $1;
|
|
69 }
|
|
70 else{
|
|
71 die "Error: window size must be an integer\n";
|
|
72 }
|
|
73
|
|
74
|
|
75 my $VCFTOOLS_EXE = "vcftools";
|
|
76
|
|
77 system("vcf-sort $input >$input.sorted");
|
|
78
|
|
79
|
|
80
|
|
81 system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --window-pi $window --window-pi-step $step >>$out.vcftools.log 2>&1");
|
|
82 system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --TajimaD $window >>$out.vcftools.log 2>&1");
|
|
83 system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --TsTv $window >>$out.vcftools.log 2>&1");
|
|
84 system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --SNPdensity $window >>$out.vcftools.log 2>&1");
|
|
85
|
|
86 if (keys(%hash) > 0)
|
|
87 {
|
|
88 my $files_pi = "";
|
|
89 my $files_dtajima = "";
|
|
90 foreach my $pop(sort(keys(%hash)))
|
|
91 {
|
|
92 my $cmd_part = $hash{$pop};
|
|
93 system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --window-pi $window --window-pi-step $step $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
|
|
94 my $sed_cmd = "sed -i \"s\/PI\/$pop\/g\" $out.$pop.windowed.pi";
|
|
95 system($sed_cmd);
|
|
96 $files_pi .= "$out.$pop.windowed.pi ";
|
|
97
|
|
98 system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --SNPdensity $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
|
|
99
|
|
100 system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --TajimaD $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
|
|
101 my $sed_cmd = "sed -i \"s\/TajimaD\/$pop\/g\" $out.$pop.Tajima.D";
|
|
102 system($sed_cmd);
|
|
103 $sed_cmd = "sed -i \"s/nan/0/g\" $out.Tajima.D";
|
|
104 system($sed_cmd);
|
|
105 $files_dtajima .= "$out.$pop.Tajima.D ";
|
|
106
|
|
107 system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --TsTv $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
|
|
108 }
|
|
109 system("paste $files_pi >>$out.combined.pi");
|
|
110 my $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$5\"\t\"\$10'} $out.combined.pi >$out.combined.pi.txt";
|
|
111 system($awk_cmd);
|
|
112
|
|
113 system("paste $files_dtajima >>$out.combined.dtajima");
|
|
114 my $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$4\"\t\"\$8'} $out.combined.dtajima >$out.combined.dtajima.txt";
|
|
115 system($awk_cmd);
|
|
116 }
|
|
117
|
|
118
|
|
119
|
|
120
|
|
121
|
|
122
|