0
|
1 #!/usr/bin/perl
|
7
|
2 #V1.0.1 added log, option parameters
|
0
|
3 use strict;
|
7
|
4 use warnings;
|
|
5 use Getopt::Long;
|
10
|
6 #v1.1.0 choose exclusion file
|
|
7 #v1.0.2 manage empty files
|
|
8 #v1.0.1 bug correction
|
|
9 #V1.0.1 added log, option parameters
|
0
|
10
|
7
|
11 my $input_pileup_file;
|
|
12 my $output_pileup_file;
|
10
|
13 my $input_exclusion_file;
|
7
|
14 my $log_file;
|
|
15
|
|
16 my $nb_base_covered=0;
|
|
17 my $nb_variant=0;
|
10
|
18 my $nb_variant_conserved=0;
|
|
19 my $do_exclusion=0;
|
|
20 my $nb_exclusion=0;
|
|
21 my $nb_excluded=0;
|
|
22 my %exclusion;
|
7
|
23 GetOptions (
|
|
24 "input_pileup_file=s" => \$input_pileup_file,
|
10
|
25 "input_exclusion_file=s" => \$input_exclusion_file,
|
7
|
26 "log_file=s" => \$log_file
|
|
27 ) or die("Error in command line arguments\n");
|
|
28
|
|
29 open(IN, $input_pileup_file) or die ("Can't open $input_pileup_file\n");
|
10
|
30 if ( -z IN){
|
|
31 exit(0);
|
|
32 }
|
|
33
|
|
34 if ($input_exclusion_file){
|
|
35 open (EX,$input_exclusion_file) or die ("Can't open $input_exclusion_file\n");
|
|
36 if ( -z EX){
|
|
37
|
|
38 }
|
|
39 else {
|
|
40 $do_exclusion=1;
|
|
41 while (my $line=<EX>){
|
|
42 $nb_exclusion++;
|
|
43 chomp($line);
|
|
44 my @fields = split(/\s+/,$line);
|
|
45 if (($fields[0])&&($fields[1])){
|
|
46 $exclusion{"$fields[0]\t$fields[1]"}=1;
|
|
47 }
|
|
48 else {
|
|
49 print STDERR "Error formatting in Exclusion File\nExclusion file lines should be 'chromosome number' TAB 'position'\n$line\n",$fields[0],"\n",$fields[1],"\n";
|
|
50 }
|
|
51 }
|
|
52
|
|
53 }
|
|
54 close (EX);
|
|
55 }
|
0
|
56
|
|
57 #Extraction des variants
|
|
58 my $nb_line=0;
|
7
|
59 while (my $line=<IN>){
|
|
60 #print $line;
|
10
|
61 if ($line !~ /^\s*$/){
|
|
62 my @fields = split(/\s+/,$line);
|
|
63 if ($fields[4]){
|
|
64 $nb_base_covered++;
|
|
65 my $pile = $fields[4];
|
|
66 $pile =~ s/\$//g; #the read start at this position
|
|
67 $pile =~ s/\^.//g; #the read end at this position followed by quality char
|
|
68 if ($fields[4]=~/[ATGCN]/i){ #Indel are +/-\d[ATGCatgc]+ and SNP are [ATGCatgc]+ so [ATGCN]/i detection cover indel and snp
|
|
69 $nb_variant++;
|
|
70 if (($do_exclusion==1)&&($exclusion{"$fields[0]\t$fields[1]"})){
|
|
71
|
|
72 }
|
|
73 else {
|
|
74 print $line;
|
|
75 $nb_variant_conserved++;
|
|
76 }
|
|
77 }
|
|
78 }
|
|
79 elsif ($fields[3]==0){
|
|
80 }
|
|
81 else {
|
|
82 print STDERR "Error in pileup format\nPileup result expected at column 5\n$line\n";
|
|
83 }
|
|
84 }
|
7
|
85 #print $line;
|
0
|
86
|
10
|
87
|
0
|
88 }
|
7
|
89 close(IN);
|
|
90
|
|
91 open (LF,">$log_file") or die("Can't open $log_file\n");
|
|
92 print LF "\n####\t Variant extraction \n";
|
10
|
93 print LF "Position covered\t:\t$nb_base_covered\n";
|
|
94 print LF "Variant detected\t:\t$nb_variant\n";
|
|
95 print LF "Variant conserved\t:\t$nb_variant_conserved\n";
|
7
|
96 close (LF);
|