Mercurial > repos > dereeper > sniplay3
comparison admixture/.svn/text-base/Admixture.pl.svn-base @ 20:13cff72ec2d3 draft
Uploaded
| author | dereeper |
|---|---|
| date | Mon, 23 Mar 2015 05:30:36 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 19:a0a95688cf17 | 20:13cff72ec2d3 |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 | |
| 3 use strict; | |
| 4 use Switch; | |
| 5 use Getopt::Long; | |
| 6 use Bio::SeqIO; | |
| 7 | |
| 8 my $usage = qq~Usage:$0 <args> [<opts>] | |
| 9 where <args> are: | |
| 10 -i, --input <input HAPMAP> | |
| 11 -o, --output <output> | |
| 12 -k, --kmin <K min. int> | |
| 13 -m, --maxK <K max. int> | |
| 14 -d, --directory <temporary directory> | |
| 15 -p, --path <path to executables> | |
| 16 ~; | |
| 17 $usage .= "\n"; | |
| 18 | |
| 19 my ($input,$output,$kmin,$kmax,$directory,$path); | |
| 20 | |
| 21 | |
| 22 GetOptions( | |
| 23 "input=s" => \$input, | |
| 24 "output=s" => \$output, | |
| 25 "kmin=s" => \$kmin, | |
| 26 "maxK=s" => \$kmax, | |
| 27 "directory=s" => \$directory, | |
| 28 "path=s" => \$path | |
| 29 ); | |
| 30 | |
| 31 | |
| 32 die $usage | |
| 33 if ( !$input || !$output || !$kmin || !$kmax || !$directory || !$path); | |
| 34 | |
| 35 if ($kmin =~/^(\d+)\s*$/){ | |
| 36 $kmin = $1; | |
| 37 } | |
| 38 else{ | |
| 39 die "Error: kmin must be an integer\n"; | |
| 40 } | |
| 41 if ($kmax =~/^(\d+)\s*$/){ | |
| 42 $kmax = $1; | |
| 43 } | |
| 44 else{ | |
| 45 die "Error: kmax must be an integer\n"; | |
| 46 } | |
| 47 | |
| 48 | |
| 49 ###################### | |
| 50 # create map file | |
| 51 ###################### | |
| 52 open(my $M,">$directory/input.map"); | |
| 53 open(my $H,$input); | |
| 54 <$H>; | |
| 55 while(<$H>) | |
| 56 { | |
| 57 my @infos = split(/\t/,$_); | |
| 58 print $M $infos[2] . "\t" . $infos[0] . "\t" . "0" . "\t" . $infos[3] . "\n"; | |
| 59 } | |
| 60 close($H); | |
| 61 close($M); | |
| 62 | |
| 63 ###################### | |
| 64 # create ped file | |
| 65 ###################### | |
| 66 system("$path/transpose.awk $input >$directory/input.ped.2"); | |
| 67 | |
| 68 open(my $P,">$directory/input.ped"); | |
| 69 open(my $P2,"$directory/input.ped.2"); | |
| 70 my $n = 0; | |
| 71 my $ind_num = 0; | |
| 72 my @individus; | |
| 73 while(<$P2>) | |
| 74 { | |
| 75 $n++; | |
| 76 if ($n > 11) | |
| 77 { | |
| 78 my $line = $_; | |
| 79 $line =~s/N/0/g; | |
| 80 if (/^([^\s]+)\s+(.*)$/) | |
| 81 { | |
| 82 $ind_num++; | |
| 83 my $ind = $1; | |
| 84 push(@individus,$ind); | |
| 85 my $genoyping_line = $2; | |
| 86 print $P "$ind $ind_num 0 0 1 2"; | |
| 87 my @genotypes = split(/\s/,$genoyping_line); | |
| 88 foreach my $genotype(@genotypes) | |
| 89 { | |
| 90 $genotype =~s/N/0/g; | |
| 91 my @alleles = split("",$genotype); | |
| 92 print $P " " . join(" ",@alleles); | |
| 93 } | |
| 94 | |
| 95 print $P "\n"; | |
| 96 } | |
| 97 } | |
| 98 } | |
| 99 close($P2); | |
| 100 close($P); | |
| 101 | |
| 102 unlink("$directory/input.ped.2"); | |
| 103 | |
| 104 system("plink --file $directory/input --out $directory/out --make-bed --noweb >>$directory/plink.log 2>&1"); | |
| 105 | |
| 106 | |
| 107 ################################### | |
| 108 # launch admixture for different K | |
| 109 ################################### | |
| 110 my %errors; | |
| 111 for (my $k = $kmin; $k <= $kmax; $k++) | |
| 112 { | |
| 113 system("admixture --cv $directory/out.bed $k >>$directory/log.$k 2>&1"); | |
| 114 my $cv_error_line = `grep -h CV $directory/log.$k`; | |
| 115 if ($cv_error_line =~/: (\d+\.*\d*)$/) | |
| 116 { | |
| 117 $errors{$1} = $k; | |
| 118 } | |
| 119 system("cat $directory/log.$k >>$directory/logs"); | |
| 120 system("echo '\n\n====================================\n\n' >>$directory/logs"); | |
| 121 system("cat out.$k.Q >>$directory/outputs.Q"); | |
| 122 system("echo '\n\n====================================\n\n' >>$directory/outputs.Q"); | |
| 123 system("cat out.$k.P >>$directory/outputs.P"); | |
| 124 system("echo '\n\n====================================\n\n' >>$directory/outputs.P"); | |
| 125 } | |
| 126 | |
| 127 my @sorted_errors = sort {$a<=>$b} keys(%errors); | |
| 128 my $best_K = $errors{@sorted_errors[0]}; | |
| 129 | |
| 130 | |
| 131 #system("cp -rf out.$best_K.Q $directory/output"); | |
| 132 | |
| 133 open(BEST1,"out.$best_K.Q"); | |
| 134 open(BEST2,">$directory/output"); | |
| 135 print BEST2 "<Covariate>\n"; | |
| 136 print BEST2 "<Trait>"; | |
| 137 for (my $j=1;$j<=$best_K;$j++) | |
| 138 { | |
| 139 print BEST2 " Q" . $j; | |
| 140 } | |
| 141 print BEST2 "\n"; | |
| 142 my $i = 0; | |
| 143 while(<BEST1>) | |
| 144 { | |
| 145 my $line = $_; | |
| 146 $line =~s/ /\t/g; | |
| 147 my $ind = $individus[$i]; | |
| 148 print BEST2 "$ind "; | |
| 149 print BEST2 $line; | |
| 150 $i++; | |
| 151 } | |
| 152 close(BEST1); | |
| 153 close(BEST2); | |
| 154 | |
| 155 system("cp -rf $directory/log.$best_K $directory/log"); | |
| 156 | |
| 157 | |
| 158 | |
| 159 |
