annotate admixture/Admixture.pl @ 12:a03f54c420f1 draft

Uploaded
author dereeper
date Fri, 20 Feb 2015 11:16:59 -0500
parents fb274c4ae95a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
2
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
3 use strict;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
4 use Switch;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
5 use Getopt::Long;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
6 use Bio::SeqIO;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
7
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
8 my $usage = qq~Usage:$0 <args> [<opts>]
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
9 where <args> are:
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
10 -i, --input <input HAPMAP>
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
11 -o, --output <output>
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
12 -k, --kmin <K min. int>
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
13 -m, --maxK <K max. int>
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
14 -d, --directory <temporary directory>
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
15 -p, --path <path to executables>
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
16 ~;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
17 $usage .= "\n";
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
18
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
19 my ($input,$output,$kmin,$kmax,$directory,$path);
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
20
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
21
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
22 GetOptions(
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
23 "input=s" => \$input,
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
24 "output=s" => \$output,
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
25 "kmin=s" => \$kmin,
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
26 "maxK=s" => \$kmax,
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
27 "directory=s" => \$directory,
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
28 "path=s" => \$path
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
29 );
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
30
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
31
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
32 die $usage
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
33 if ( !$input || !$output || !$kmin || !$kmax || !$directory || !$path);
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
34
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
35 if ($kmin =~/^(\d+)\s*$/){
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
36 $kmin = $1;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
37 }
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
38 else{
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
39 die "Error: kmin must be an integer\n";
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
40 }
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
41 if ($kmax =~/^(\d+)\s*$/){
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
42 $kmax = $1;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
43 }
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
44 else{
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
45 die "Error: kmax must be an integer\n";
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
46 }
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
47
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
48
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
49 ######################
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
50 # create map file
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
51 ######################
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
52 open(my $M,">$directory/input.map");
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
53 open(my $H,$input);
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
54 <$H>;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
55 while(<$H>)
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
56 {
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
57 my @infos = split(/\t/,$_);
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
58 print $M $infos[2] . "\t" . $infos[0] . "\t" . "0" . "\t" . $infos[3] . "\n";
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
59 }
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
60 close($H);
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
61 close($M);
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
62
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
63 ######################
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
64 # create ped file
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
65 ######################
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
66 system("$path/transpose.awk $input >$directory/input.ped.2");
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
67
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
68 open(my $P,">$directory/input.ped");
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
69 open(my $P2,"$directory/input.ped.2");
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
70 my $n = 0;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
71 my $ind_num = 0;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
72 my @individus;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
73 while(<$P2>)
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
74 {
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
75 $n++;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
76 if ($n > 11)
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
77 {
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
78 my $line = $_;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
79 $line =~s/N/0/g;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
80 if (/^([^\s]+)\s+(.*)$/)
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
81 {
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
82 $ind_num++;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
83 my $ind = $1;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
84 push(@individus,$ind);
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
85 my $genoyping_line = $2;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
86 print $P "$ind $ind_num 0 0 1 2";
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
87 my @genotypes = split(/\s/,$genoyping_line);
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
88 foreach my $genotype(@genotypes)
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
89 {
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
90 $genotype =~s/N/0/g;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
91 my @alleles = split("",$genotype);
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
92 print $P " " . join(" ",@alleles);
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
93 }
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
94
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
95 print $P "\n";
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
96 }
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
97 }
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
98 }
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
99 close($P2);
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
100 close($P);
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
101
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
102 unlink("$directory/input.ped.2");
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
103
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
104 system("plink --file $directory/input --out $directory/out --make-bed --noweb >>$directory/plink.log 2>&1");
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
105
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
106
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
107 ###################################
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
108 # launch admixture for different K
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
109 ###################################
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
110 my %errors;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
111 for (my $k = $kmin; $k <= $kmax; $k++)
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
112 {
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
113 system("admixture --cv $directory/out.bed $k >>$directory/log.$k 2>&1");
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
114 my $cv_error_line = `grep -h CV $directory/log.$k`;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
115 if ($cv_error_line =~/: (\d+\.*\d*)$/)
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
116 {
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
117 $errors{$1} = $k;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
118 }
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
119 system("cat $directory/log.$k >>$directory/logs");
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
120 system("echo '\n\n====================================\n\n' >>$directory/logs");
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
121 system("cat out.$k.Q >>$directory/outputs.Q");
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
122 system("echo '\n\n====================================\n\n' >>$directory/outputs.Q");
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
123 system("cat out.$k.P >>$directory/outputs.P");
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
124 system("echo '\n\n====================================\n\n' >>$directory/outputs.P");
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
125 }
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
126
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
127 my @sorted_errors = sort {$a<=>$b} keys(%errors);
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
128 my $best_K = $errors{@sorted_errors[0]};
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
129
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
130
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
131 #system("cp -rf out.$best_K.Q $directory/output");
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
132
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
133 open(BEST1,"out.$best_K.Q");
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
134 open(BEST2,">$directory/output");
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
135 print BEST2 "<Covariate>\n";
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
136 print BEST2 "<Trait>";
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
137 for (my $j=1;$j<=$best_K;$j++)
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
138 {
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
139 print BEST2 " Q" . $j;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
140 }
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
141 print BEST2 "\n";
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
142 my $i = 0;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
143 while(<BEST1>)
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
144 {
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
145 my $line = $_;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
146 $line =~s/ /\t/g;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
147 my $ind = $individus[$i];
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
148 print BEST2 "$ind ";
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
149 print BEST2 $line;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
150 $i++;
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
151 }
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
152 close(BEST1);
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
153 close(BEST2);
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
154
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
155 system("cp -rf $directory/log.$best_K $directory/log");
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
156
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
157
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
158
fb274c4ae95a Uploaded
dereeper
parents:
diff changeset
159