annotate vcftools_main/VCFToolsSlidingWindow/VCFToolsSlidingWindow.pl @ 0:3b1436a9a6e5 draft

Uploaded
author gandres
date Thu, 02 Jul 2015 05:21:40 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
1
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
2 #!/usr/bin/perl
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
3
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
4 use strict;
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
5 use Getopt::Long;
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
6
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
7 my $usage = qq~Usage:$0 <args> [<opts>]
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
8
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
9 where <args> are:
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
10
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
11 -i, --input <VCF input>
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
12 -o, --out <output basename>
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
13 -s, --step <step size>
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
14 -w, --window <window size>
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
15
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
16 <opts> are:
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
17 -g, --group <group file>
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
18 ~;
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
19 $usage .= "\n";
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
20
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
21 my ($input,$out);
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
22 my $window = 200000;
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
23 my $step = 200000;
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
24 my $groupfile;
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
25 GetOptions(
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
26 "input=s" => \$input,
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
27 "out=s" => \$out,
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
28 "step=s" => \$step,
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
29 "window=s" => \$window,
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
30 "group=s" => \$groupfile
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
31 );
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
32
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
33
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
34
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
35
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
36 die $usage
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
37 if ( !$input);
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
38
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
39 my %hash;
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
40 my $cmd_part = "";
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
41 if ($groupfile && -e $groupfile)
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
42 {
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
43 open(my $G,$groupfile) or die "Cannot open $groupfile: $!";
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
44 while(<$G>)
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
45 {
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
46 my $line = $_;
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
47 chomp($line);
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
48 $line=~s/\r//g;
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
49 $line=~s/\n//g;
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
50 my @infos = split(/;/,$line);
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
51 if ($infos[0] && $infos[1])
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
52 {
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
53 $hash{$infos[1]} .= " --indv " . $infos[0];
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
54 $cmd_part .= " --indv " . $infos[0];
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
55 }
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
56 }
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
57 close($G);
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
58 }
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
59
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
60
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
61 if ($step =~/^(\d+)\s*$/){
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
62 $step = $1;
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
63 }
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
64 else{
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
65 die "Error: step size must be an integer\n";
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
66 }
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
67 if ($window =~/^(\d+)\s*$/){
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
68 $window = $1;
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
69 }
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
70 else{
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
71 die "Error: window size must be an integer\n";
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
72 }
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
73
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
74
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
75 my $VCFTOOLS_EXE = "vcftools";
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
76
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
77 system("vcf-sort $input >$input.sorted");
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
78
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
79
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
80
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
81 system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --window-pi $window --window-pi-step $step >>$out.vcftools.log 2>&1");
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
82 system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --TajimaD $window >>$out.vcftools.log 2>&1");
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
83 system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --TsTv $window >>$out.vcftools.log 2>&1");
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
84 system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --SNPdensity $window >>$out.vcftools.log 2>&1");
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
85
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
86 if (keys(%hash) > 0)
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
87 {
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
88 my $files_pi = "";
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
89 my $files_dtajima = "";
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
90 foreach my $pop(sort(keys(%hash)))
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
91 {
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
92 my $cmd_part = $hash{$pop};
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
93 system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --window-pi $window --window-pi-step $step $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
94 my $sed_cmd = "sed -i \"s\/PI\/$pop\/g\" $out.$pop.windowed.pi";
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
95 system($sed_cmd);
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
96 $files_pi .= "$out.$pop.windowed.pi ";
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
97
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
98 system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --SNPdensity $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
99
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
100 system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --TajimaD $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
101 my $sed_cmd = "sed -i \"s\/TajimaD\/$pop\/g\" $out.$pop.Tajima.D";
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
102 system($sed_cmd);
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
103 $sed_cmd = "sed -i \"s/nan/0/g\" $out.Tajima.D";
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
104 system($sed_cmd);
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
105 $files_dtajima .= "$out.$pop.Tajima.D ";
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
106
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
107 system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --TsTv $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
108 }
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
109 system("paste $files_pi >>$out.combined.pi");
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
110 my $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$5\"\t\"\$10'} $out.combined.pi >$out.combined.pi.txt";
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
111 system($awk_cmd);
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
112
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
113 system("paste $files_dtajima >>$out.combined.dtajima");
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
114 my $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$4\"\t\"\$8'} $out.combined.dtajima >$out.combined.dtajima.txt";
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
115 system($awk_cmd);
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
116 }
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
117
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
118
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
119
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
120
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
121
3b1436a9a6e5 Uploaded
gandres
parents:
diff changeset
122