3
|
1 #!/usr/bin/perl
|
|
2
|
|
3 use strict;
|
|
4
|
|
5 my $nb_file_found = 0;
|
|
6 open(LS,"ls *Nfowleri.snp |");
|
|
7 while(<LS>){
|
|
8 $nb_file_found++;
|
|
9 }
|
|
10 close(LS);
|
|
11 if ($nb_file_found < 1){
|
|
12 print "No file found with extension \".Nfowleri.snp\". This script allows to merge multiple SNP files (named as follows \"strainname.Nfowleri.snp\") obtained with VarScan into a global VCF file.\n";
|
|
13 exit;
|
|
14 }
|
|
15
|
|
16
|
|
17 print "##fileformat=VCFv4.1\n";
|
|
18 print "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT ";
|
|
19
|
|
20 my %corr;
|
|
21 open(F,"/media/results_datacalcul/Alexis/Naegleria/raw/SRR.info.xls");
|
|
22 while(<F>){
|
|
23 my $line = $_;
|
|
24 $line =~s/\n//g;$line =~s/\r//g;
|
|
25 my ($srr,$name) = split(/\t/,$line);
|
|
26 #$corr{$srr} = $name;
|
|
27 }
|
|
28 close(F);
|
|
29
|
|
30 my %str;
|
|
31 my @strains;
|
|
32 my %hash;
|
|
33 my %genotypes;
|
|
34 my %alternates;
|
|
35 my %reference_bases;
|
|
36 open(LS,"ls *Nfowleri.snp |");
|
|
37 #open(LS,"ls *Nlovaniensis.snp |");
|
|
38 #open(LS,"ls ../../tuberculosis/VCF_comparaison_avec_reference/*vcf |");
|
|
39 while(<LS>){
|
|
40 my $file = $_;
|
|
41 $file =~s/\n//g;$file =~s/\r//g;
|
|
42 my $strain;
|
|
43 if ($file =~/^(.*).Nfowleri.snp/){
|
|
44 #if ($file =~/^(.*).Nlovaniensis.snp/){
|
|
45 #if ($file =~/\/([^\/]*)\.vcf/){
|
|
46 $strain = $1;
|
|
47 $strain =~s/_/-/g;
|
|
48 #if ($strain =~/SRR12781212/ or $strain =~/SRR12781213/ or $strain =~/SRR12781214/ or $strain =~/SRR12781215/ or $strain =~/SRR12781216/){next;}
|
|
49 #if ($strain !~/NF/){next;}
|
|
50 push(@strains,$strain);
|
|
51 $str{$strain} = 1;
|
|
52 #open(F,"$strain.Klebsiella_pneumoniae_subsp._pneumoniae_HS11286.snp");
|
|
53 open(F,$file);
|
|
54 <F>;
|
|
55 while(<F>){
|
|
56 my $line = $_;
|
|
57 $line =~s/\n//g;$line =~s/\r//g;
|
|
58 if (/^#/){next;}
|
|
59 my ($chr,$pos,$ref,$alt,$info) = split("\t",$line);
|
|
60 my ($Cons,$Cov,$Reads1,$Reads2,$Freq) = split(":",$info);
|
|
61 $Freq =~s/%//g;$Freq =~s/,/./g;
|
|
62 $chr =~s/\.1//g;
|
|
63 $alternates{$chr}{$pos}{$alt}=1;
|
|
64 $reference_bases{$chr}{$pos} = $ref;
|
|
65 if ($Cov > 6 && $Freq > 70){
|
|
66 $hash{$chr}{$pos}{$strain} = "1/1";
|
|
67 }
|
|
68 elsif ($Cov > 6 && $Freq > 34){
|
|
69 $hash{$chr}{$pos}{$strain} = "0/1";
|
|
70 }
|
|
71 else{
|
|
72 #$hash{$chr}{$pos}{$strain} = "0/0";
|
|
73 }
|
|
74 }
|
|
75 close(F);
|
|
76 }
|
|
77 }
|
|
78 close(LS);
|
|
79
|
|
80 my %depths;
|
|
81 open(LS,"ls *.depth |");
|
|
82 while(<LS>){
|
|
83 my $file = $_;
|
|
84 $file =~s/\n//g;$file =~s/\r//g;
|
|
85 my $strain;
|
|
86 if ($file =~/^(.*).Nfowleri.depth/){
|
|
87 #if ($file =~/^(.*).Nlovaniensis.depth/){
|
|
88 $strain = $1;
|
|
89 $strain =~s/_/-/g;
|
|
90 if (!$str{$strain}){next;}
|
|
91 #if ($strain !~/NF/){next;}
|
|
92 open(F,$file);
|
|
93 while(<F>){
|
|
94 my $line = $_;
|
|
95 $line =~s/\n//g;$line =~s/\r//g;
|
|
96 my ($chr,$pos,$depth) = split("\t",$line);
|
|
97 $chr =~s/\.1//g;
|
|
98 if ($hash{$chr}{$pos}){
|
|
99 $depths{$chr}{$pos}{$strain}=1;
|
|
100 }
|
|
101 }
|
|
102 close(F);
|
|
103 }
|
|
104 }
|
|
105 close(LS);
|
|
106
|
|
107 foreach my $strain(@strains){
|
|
108 if ($corr{$strain}){
|
|
109 $strain= $corr{$strain};
|
|
110 }
|
|
111 print "$strain\t";
|
|
112 }
|
|
113 print "Reference\n";
|
|
114 foreach my $chr(sort {$a<=>$b} keys(%hash)){
|
|
115 my $refsubhash = $hash{$chr};
|
|
116 my %subhash2 = %$refsubhash;
|
|
117 foreach my $pos(sort {$a<=>$b} keys(%subhash2)){
|
|
118 my $refbase = $reference_bases{$chr}{$pos};
|
|
119 my $ref_alt = $alternates{$chr}{$pos};
|
|
120 my %subhash_alt = %$ref_alt;
|
|
121 my $alternate;
|
|
122
|
|
123 # discard multiallelic
|
|
124 if (scalar keys(%subhash_alt) > 1){
|
|
125 next;
|
|
126 }
|
|
127 else{
|
|
128 foreach my $alt(keys(%subhash_alt)){
|
|
129 $alternate = $alt;
|
|
130 }
|
|
131 }
|
|
132
|
|
133 print "$chr $pos . $refbase $alternate 186.96 . PASS GT:AD:DP:GQ:PL";
|
|
134 foreach my $strain(@strains){
|
|
135 my $val = $hash{$chr}{$pos}{$strain};
|
|
136 if ($val){
|
|
137 print " $val";
|
|
138 }
|
|
139 else{
|
|
140 if ($depths{$chr}{$pos}{$strain}){
|
|
141 print " 0/0";
|
|
142 }
|
|
143 else{
|
|
144 print " ./.";
|
|
145 }
|
|
146 }
|
|
147
|
|
148 }
|
|
149 print " 0/0\n";
|
|
150 }
|
|
151 }
|
|
152
|
|
153
|