annotate VCFToolsSlidingWindow/VCFToolsSlidingWindow.pl @ 3:151a9aec4607 draft

Deleted selected files
author dereeper
date Sun, 04 Feb 2024 14:19:20 +0000
parents 7130bd0d2a83
children 3ae4c598ac08
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
1
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
2 #!/usr/bin/perl
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
3
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
4 use strict;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
5 use Getopt::Long;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
6 use Bio::SeqIO;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
7
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
8 my $usage = qq~Usage:$0 <args> [<opts>]
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
9
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
10 where <args> are:
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
11
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
12 -i, --input <VCF input>
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
13 -o, --out <output basename>
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
14 -w, --window <window size>
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
15
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
16 <opts> are:
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
17 -g, --group <group file>
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
18 ~;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
19 $usage .= "\n";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
20
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
21 my ($input,$out);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
22 my $window = 200000;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
23 my $groupfile;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
24 GetOptions(
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
25 "input=s" => \$input,
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
26 "out=s" => \$out,
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
27 "window=s" => \$window,
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
28 "group=s" => \$groupfile
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
29 );
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
30
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
31
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
32
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
33 die $usage
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
34 if ( !$input);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
35
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
36 my %hash;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
37 my $cmd_part = "";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
38 if ($groupfile && -e $groupfile)
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
39 {
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
40 open(my $G,$groupfile) or die "Cannot open $groupfile: $!";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
41 while(<$G>)
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
42 {
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
43 my $line = $_;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
44 chomp($line);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
45 $line=~s/\r//g;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
46 $line=~s/\n//g;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
47 my @infos = split(/;/,$line);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
48 if ($infos[0] && $infos[1])
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
49 {
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
50 $hash{$infos[1]} .= " --indv " . $infos[0];
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
51 $cmd_part .= " --indv " . $infos[0];
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
52 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
53 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
54 close($G);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
55 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
56
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
57
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
58 if ($window =~/^(\d+)\s*$/){
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
59 $window = $1;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
60 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
61 else{
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
62 die "Error: window size must be an integer\n";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
63 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
64
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
65
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
66 my $input_format = "--vcf $input.sorted";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
67 if ($input =~/\.vcf/){
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
68 system("vcf-sort $input >$input.sorted");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
69 $input_format = "--vcf $input.sorted";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
70 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
71 elsif ($input =~/\.bcf/){
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
72 $input_format = "--bcf $input";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
73 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
74 system("vcftools $input_format --out $out --window-pi $window --window-pi-step $window >>$out.vcftools.log 2>&1");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
75 system("vcftools $input_format --out $out --TajimaD $window >>$out.vcftools.log 2>&1");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
76 system("vcftools $input_format --out $out --TsTv $window >>$out.vcftools.log 2>&1");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
77 system("vcftools $input_format --out $out --SNPdensity $window >>$out.vcftools.log 2>&1");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
78
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
79
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
80
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
81 ##################################################
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
82 # Tajima distribution
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
83 ##################################################
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
84 my %counts3;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
85 my @values;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
86 open(my $TAJIMA,"$out.Tajima.D");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
87 <$TAJIMA>;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
88 my $max = 0;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
89 while(<$TAJIMA>){
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
90 my $line = $_;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
91 $line =~s/\n//g;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
92 $line =~s/\r//g;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
93 my @inf = split(/\t/,$line);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
94 my $val = $inf[3];
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
95 my $arrondi = sprintf("%.1f",$val);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
96 $counts3{$arrondi}++;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
97 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
98 close($TAJIMA);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
99
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
100
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
101 my %counts3_pop;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
102
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
103 if (keys(%hash) > 0)
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
104 {
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
105 my $files_pi = "";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
106 my $files_dtajima = "";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
107 my $cmd_fst = "--fst-window-size $window";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
108 my $cmd_fst_by_marker = "";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
109 foreach my $pop(sort(keys(%hash)))
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
110 {
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
111 my $cmd_part = $hash{$pop};
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
112
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
113
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
114 open(my $POP,">$input.$pop.txt");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
115 my @ind_of_pop = split(" --indv ",$cmd_part);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
116 print $POP join("\n",@ind_of_pop);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
117 close($POP);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
118 $cmd_fst .= " --weir-fst-pop $input.$pop.txt";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
119 $cmd_fst_by_marker .= " --weir-fst-pop $input.$pop.txt";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
120
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
121 my $cmd_part = $hash{$pop};
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
122 system("vcftools $input_format --remove-filtered-all --out $out.$pop --window-pi $window --window-pi-step $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
123 my $sed_cmd = "sed -i \"s\/PI\/$pop\/g\" $out.$pop.windowed.pi";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
124 system($sed_cmd);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
125 $files_pi .= "$out.$pop.windowed.pi ";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
126
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
127 system("vcftools $input_format --remove-filtered-all --out $out.$pop --SNPdensity $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
128
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
129 system("vcftools $input_format --remove-filtered-all --out $out.$pop --TajimaD $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
130 my $sed_cmd = "sed -i \"s\/TajimaD\/$pop\/g\" $out.$pop.Tajima.D";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
131 system($sed_cmd);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
132 $sed_cmd = "sed -i \"s/nan/0/g\" $out.Tajima.D";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
133 system($sed_cmd);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
134 $files_dtajima .= "$out.$pop.Tajima.D ";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
135
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
136
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
137 open(my $TAJIMAPOP,"$out.$pop.Tajima.D");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
138 <$TAJIMAPOP>;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
139 my $max = 0;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
140 while(<$TAJIMAPOP>){
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
141 my $line = $_;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
142 $line =~s/\n//g;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
143 $line =~s/\r//g;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
144 my @inf = split(/\t/,$line);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
145 my $val = $inf[3];
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
146 my $arrondi = sprintf("%.1f",$val);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
147 $counts3_pop{$pop}{$arrondi}++;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
148 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
149 close($TAJIMAPOP);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
150
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
151 system("vcftools $input_format --remove-filtered-all --out $out.$pop --TsTv $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
152 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
153 system("paste $files_pi >$out.combined.pi");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
154 my $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$5\"\t\"\$10'} $out.combined.pi >$out.combined.pi.txt";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
155 system($awk_cmd);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
156
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
157 system("paste $files_dtajima >>$out.combined.dtajima");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
158 $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$4\"\t\"\$8'} $out.combined.dtajima >$out.combined.dtajima.txt";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
159 system($awk_cmd);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
160
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
161 system("vcftools $input_format --remove-filtered-all --out $out.fst $cmd_fst --maf 0.001 >>$out.vcftools.log 2>&1");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
162 $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$5'} $out.fst.windowed.weir.fst >$out.fst.txt";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
163 system($awk_cmd);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
164
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
165 system("vcftools $input_format --remove-filtered-all --out $out.fst $cmd_fst_by_marker --maf 0.001 >>$out.vcftools.log 2>&1");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
166
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
167 my %genes;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
168 open(my $V,$input);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
169 my $go = 0;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
170 while(<$V>){
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
171 if ($go){
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
172 my @infos = split(/\t/,$_);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
173 my $chr = $infos[0];
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
174 my $pos = $infos[1];
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
175 my $annot = $infos[7];
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
176 my $gene = "#";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
177 if ($annot =~/EFF=\w+\((.*)\)/){
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
178 my @parts = split(/\|/,$1);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
179 $gene = $parts[4];
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
180 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
181 $genes{"$chr:$pos"} = $gene;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
182 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
183 if (/^#CHROM/){$go = 1;}
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
184 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
185
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
186 my $cumul = 0;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
187 my $previous_pos;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
188 my %chrs;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
189
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
190 # get only the first 2000 points
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
191 `sort -k 3 $out.fst.weir.fst | grep -v 'nan' | grep -v 'e-' | tail -10001 | sort -n -k 2 >$out.fst.weir.highest.fst`;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
192
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
193 my %hash;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
194 open(my $F,"$out.fst.weir.highest.fst");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
195 <$F>;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
196 open(my $O,">$out.fst.by_marker.txt");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
197 open(my $O2,">$out.fst.by_marker.genes.txt");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
198 print $O "Chromosome Marker Position Fst\n";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
199 print $O2 "Chromosome Marker Position Fst\n";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
200 while(<$F>){
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
201 my $line = $_;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
202 chomp($line);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
203 my ($chr,$pos,$val) = split(/\t/,$line);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
204 $hash{$chr}{$pos} = $val;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
205 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
206 close($F);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
207 foreach my $chr(sort keys(%hash)){
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
208 my $ref_hash = $hash{$chr};
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
209 my %hash2 = %$ref_hash;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
210 foreach my $pos(sort {$a<=>$b} keys(%hash2)){
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
211 my $val = $hash{$chr}{$pos};
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
212 if (!$chrs{$chr}){
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
213 $cumul = $previous_pos;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
214 $chrs{$chr} = 1;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
215 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
216 my $x = $pos+$cumul;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
217 #print $O2 "$chr $chr:$pos $pos $val " . $genes{"$chr:$pos"} . "\n";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
218 print $O2 "$chr $chr:$pos $pos $val\n";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
219 print $O "$chr $chr:$pos $x $val\n";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
220 $previous_pos = $x;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
221 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
222
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
223 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
224
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
225 close($O);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
226 close($O2);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
227
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
228 #$awk_cmd = "awk {'if (\$3>0.6 && \$3<=1 && \$3!~\"nan\"){print \$1\"\t\"\$2\"\t\"\$2\"\t\"\$3}'} $out.fst.weir.fst >$out.fst.by_marker.txt";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
229 #system($awk_cmd);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
230 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
231 open(my $M3,">$out.tajima_distrib");
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
232 print $M3 "Values nb of values";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
233 if (keys(%hash) > 0){
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
234 print $M3 " ".join("\t",keys(%hash));
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
235 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
236 print $M3 "\n";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
237 for (my $i = -2.5; $i <= 2.5; $i=$i+0.1)
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
238 {
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
239 my $arrondi = sprintf("%.1f",$i);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
240 my $nb = 0;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
241 if ($counts3{$arrondi})
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
242 {
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
243 $nb = $counts3{$arrondi};
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
244 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
245 print $M3 "Tajima $arrondi $nb";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
246 if (keys(%hash) > 0){
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
247 foreach my $pop(sort(keys(%hash))){
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
248 my $nb_pop = 0;
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
249 if ($counts3_pop{$pop}{$arrondi}){
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
250 $nb_pop = $counts3_pop{$pop}{$arrondi};
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
251 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
252 print $M3 " $nb_pop";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
253 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
254 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
255 print $M3 "\n";
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
256 }
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
257 close($M3);
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
258
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
259
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
260
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
261
7130bd0d2a83 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents:
diff changeset
262