annotate hapmap2mlmm/HapmapToMLMMFiles.pl @ 10:c6640c49fd01 draft

planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
author dereeper
date Mon, 16 Apr 2018 09:00:24 -0400
parents 98c37a5d67f4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
2
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
3 use strict;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
4
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
5 use Getopt::Long;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
6
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
7 my $usage = qq~Usage:$0 <args> [<opts>]
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
8 where <args> are:
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
9 -h, --hapmap <Hapmap input file>
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
10 -m, --map <Map output file>
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
11 -g, --geno <Genotype output file>
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
12 -p, --path <Path for transpose executable>
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
13 ~;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
14 $usage .= "\n";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
15
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
16 my ($hapmap,$map,$geno,$path);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
17
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
18
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
19 GetOptions(
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
20 "geno=s" => \$geno,
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
21 "map=s" => \$map,
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
22 "hapmap=s" => \$hapmap,
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
23 "path=s" => \$path,
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
24 );
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
25
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
26
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
27 die $usage
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
28 if ( !$geno || !$map || !$hapmap || !$path);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
29
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
30 my $TRANSPOSE_EXE = "$path/transpose.awk";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
31
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
32 my @snps;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
33 my %chrom_pos;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
34 my $num_line = 0;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
35 open(my $O,">geno_transposed");
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
36 open(my $H,$hapmap);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
37 while(<$H>)
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
38 {
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
39 $num_line++;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
40 my $line = $_;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
41 chomp($line);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
42 $line =~s/\r//g;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
43 $line =~s/\n//g;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
44 my @infos = split(/\t/,$line);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
45 if ($num_line == 1)
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
46 {
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
47 print $O "Ind_id";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
48 for (my $i = 11; $i <= $#infos; $i++)
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
49 {
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
50 my $individual = $infos[$i];
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
51 print $O " " . $individual;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
52 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
53 print $O "\n";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
54 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
55 elsif ($num_line > 1)
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
56 {
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
57 my $snp = $infos[0];
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
58 my $variation = $infos[1];
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
59 my %scores;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
60 if ($variation =~/(\w)\/(\w)/)
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
61 {
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
62 my $allele1 = $1;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
63 my $allele2 = $2;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
64 $scores{$allele1} = 0;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
65 $scores{$allele2} = 1;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
66 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
67 my $chrom = $infos[2];
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
68 my $pos = $infos[3];
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
69 $chrom_pos{$snp}{"chrom"} = $chrom;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
70 $chrom_pos{$snp}{"pos"} = $pos;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
71 push(@snps,$snp);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
72 print $O "$snp";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
73 for (my $i = 11; $i <= $#infos; $i++)
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
74 {
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
75 my $genotype = $infos[$i];
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
76 my @alleles = split("",$genotype);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
77 if ($genotype ne "NN")
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
78 {
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
79 my $score = $scores{$alleles[0]} + $scores{$alleles[1]};
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
80 print $O " $score";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
81 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
82 else
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
83 {
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
84 print $O " NA";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
85 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
86 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
87 print $O "\n";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
88 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
89 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
90 close($H);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
91 close($O);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
92
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
93 open(my $M,">$map");
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
94 print $M "SNP Chr Pos\n";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
95 foreach my $snp(@snps)
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
96 {
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
97 print $M "$snp " . $chrom_pos{$snp}{"chrom"} . " ". $chrom_pos{$snp}{"pos"} . "\n";
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
98 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
99 close($M);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
100
10
c6640c49fd01 planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
dereeper
parents: 9
diff changeset
101 system("gawk -E $TRANSPOSE_EXE geno_transposed >geno_transposed2");
9
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
102
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
103 open(my $F,">$geno");
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
104 open(my $G,"geno_transposed2");
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
105 while(<$G>)
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
106 {
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
107 my $line = $_;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
108 $line =~s/ /\t/g;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
109 print $F $line;
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
110 }
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
111 close($G);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
112 close($F);
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
113
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
114 unlink("geno_transposed");
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
115 unlink("geno_transposed2");
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
116
98c37a5d67f4 Uploaded
dereeper
parents:
diff changeset
117