Mercurial > repos > dereeper > sniplay
comparison GetHaplotypesFromPhasedVCF/GetHaplotypesFromPhasedVCF.pl @ 9:98c37a5d67f4 draft
Uploaded
author | dereeper |
---|---|
date | Wed, 07 Feb 2018 22:08:47 -0500 |
parents | ebb0ac9b6fa9 |
children | 88748d846a20 |
comparison
equal
deleted
inserted
replaced
8:6bf69b40365c | 9:98c37a5d67f4 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 | |
5 my $vcf = $ARGV[0]; | |
6 my $out = $ARGV[1]; | |
7 | |
8 open(O1,">$out.haplo.fas"); | |
9 open(O2,">$out.distinct_haplotypes.txt"); | |
10 | |
11 my %indiv; | |
12 my %genes; | |
13 my $nb_cols = 0; | |
14 my %phasing; | |
15 open(my $V,$vcf); | |
16 while(<$V>){ | |
17 my $line = $_; | |
18 $line =~s/\n//g; | |
19 $line =~s/\r//g; | |
20 my @infos = split(/\t/,$line); | |
21 if (/#CHROM/){ | |
22 for (my $i = 9; $i <= $#infos; $i++){ | |
23 $indiv{$i} = $infos[$i]; | |
24 } | |
25 } | |
26 else{ | |
27 my $gene = $infos[0]; | |
28 my $ref = $infos[3]; | |
29 my $alt = $infos[4]; | |
30 $nb_cols = $#infos; | |
31 for (my $i = 9; $i <= $#infos; $i++){ | |
32 my ($alleles,$depth) = split(":",$infos[$i]); | |
33 $alleles =~s/0/$ref/g; | |
34 $alleles =~s/1/$alt/g; | |
35 my $ind = $indiv{$i}; | |
36 #if ($alleles =~/\// && $genes{$gene}){next;} | |
37 $phasing{$gene}{$i}.= $alleles ." "; | |
38 } | |
39 $genes{$gene} = 1; | |
40 } | |
41 } | |
42 close($V); | |
43 | |
44 my %haplos; | |
45 my $gene_ok = 0; | |
46 my $gene_shared = 0; | |
47 my $gene_all_shared = 0; | |
48 my $nb_gene = 0; | |
49 my %haplotypes2; | |
50 foreach my $gene(keys(%phasing)) | |
51 { | |
52 my $list = ""; | |
53 my $ok = 0; | |
54 my %haplotypes; | |
55 for (my $i=9;$i <= $nb_cols;$i++){ | |
56 | |
57 my $alleles = $phasing{$gene}{$i}; | |
58 if ($alleles eq "" or $alleles =~ /\./ or $alleles =~/ \w+\//){next;} | |
59 my @snps = split(" ",$alleles); | |
60 if (scalar @snps < 2){next;} | |
61 $alleles =~s/\//\|/g; | |
62 my @al = split(" ",$alleles); | |
63 my $haplo1 = ""; | |
64 my $haplo2 = ""; | |
65 foreach my $a(@al){ | |
66 my ($a1,$a2) = split(/\|/,$a); | |
67 $haplo1.= $a1; | |
68 $haplo2.= $a2; | |
69 } | |
70 $haplotypes{$haplo1}++; | |
71 $haplotypes{$haplo2}++; | |
72 my $ind = $indiv{$i}; | |
73 $haplotypes2{$gene}{$haplo1}.=$ind."_1,"; | |
74 $haplotypes2{$gene}{$haplo2}.=$ind."_2,"; | |
75 $haplos{$gene}{$haplo1}++; | |
76 $haplos{$gene}{$haplo2}++; | |
77 $list .= ">".$gene."_".$ind."_1\n$haplo1\n"; | |
78 $list .= ">".$gene."_".$ind."_2\n$haplo2\n"; | |
79 $ok++; | |
80 } | |
81 #print "$gene $nb_cols $ok\n"; | |
82 #if ($ok > 13 && $found_M1_M2 == 2){ | |
83 if ($ok == ($nb_cols - 8)){ | |
84 $nb_gene++; | |
85 print O1 $list; | |
86 } | |
87 } | |
88 foreach my $gene(sort {$a<=>$b} keys(%haplos)){ | |
89 print O2 "===$gene===\n"; | |
90 my $ref_hash = $haplos{$gene}; | |
91 my %hash = %$ref_hash; | |
92 my $num_haplo = 0; | |
93 foreach my $haplo(keys(%hash)){ | |
94 $num_haplo++; | |
95 my $haplo_name = "haplo".$num_haplo; | |
96 my $nb = $haplos{$gene}{$haplo}; | |
97 my $ind = $haplotypes2{$gene}{$haplo}; | |
98 print O2 $haplo_name.":$nb:".$haplotypes2{$gene}{$haplo}."\n".$haplo."\n"; | |
99 if ($nb > 1){ | |
100 #print "$nb \n"; | |
101 } | |
102 } | |
103 } | |
104 | |
105 close(O1); | |
106 close(O1); | |
107 #print scalar keys(%haplos); | |
108 |