Mercurial > repos > dereeper > sniplay
view hapmap2mlmm/HapmapToMLMMFiles.pl @ 11:15b23cdde685 draft
planemo upload commit 305985afd3b7c3d47f531149c2f1a279af2d12aa-dirty
author | dereeper |
---|---|
date | Fri, 20 Apr 2018 09:04:25 -0400 |
parents | c6640c49fd01 |
children |
line wrap: on
line source
#!/usr/bin/perl use strict; use Getopt::Long; my $usage = qq~Usage:$0 <args> [<opts>] where <args> are: -h, --hapmap <Hapmap input file> -m, --map <Map output file> -g, --geno <Genotype output file> -p, --path <Path for transpose executable> ~; $usage .= "\n"; my ($hapmap,$map,$geno,$path); GetOptions( "geno=s" => \$geno, "map=s" => \$map, "hapmap=s" => \$hapmap, "path=s" => \$path, ); die $usage if ( !$geno || !$map || !$hapmap || !$path); my $TRANSPOSE_EXE = "$path/transpose.awk"; my @snps; my %chrom_pos; my $num_line = 0; open(my $O,">geno_transposed"); open(my $H,$hapmap); while(<$H>) { $num_line++; my $line = $_; chomp($line); $line =~s/\r//g; $line =~s/\n//g; my @infos = split(/\t/,$line); if ($num_line == 1) { print $O "Ind_id"; for (my $i = 11; $i <= $#infos; $i++) { my $individual = $infos[$i]; print $O " " . $individual; } print $O "\n"; } elsif ($num_line > 1) { my $snp = $infos[0]; my $variation = $infos[1]; my %scores; if ($variation =~/(\w)\/(\w)/) { my $allele1 = $1; my $allele2 = $2; $scores{$allele1} = 0; $scores{$allele2} = 1; } my $chrom = $infos[2]; my $pos = $infos[3]; $chrom_pos{$snp}{"chrom"} = $chrom; $chrom_pos{$snp}{"pos"} = $pos; push(@snps,$snp); print $O "$snp"; for (my $i = 11; $i <= $#infos; $i++) { my $genotype = $infos[$i]; my @alleles = split("",$genotype); if ($genotype ne "NN") { my $score = $scores{$alleles[0]} + $scores{$alleles[1]}; print $O " $score"; } else { print $O " NA"; } } print $O "\n"; } } close($H); close($O); open(my $M,">$map"); print $M "SNP Chr Pos\n"; foreach my $snp(@snps) { print $M "$snp " . $chrom_pos{$snp}{"chrom"} . " ". $chrom_pos{$snp}{"pos"} . "\n"; } close($M); system("gawk -E $TRANSPOSE_EXE geno_transposed >geno_transposed2"); open(my $F,">$geno"); open(my $G,"geno_transposed2"); while(<$G>) { my $line = $_; $line =~s/ /\t/g; print $F $line; } close($G); close($F); unlink("geno_transposed"); unlink("geno_transposed2");