0
|
1 #!/usr/bin/perl
|
7
|
2 #V1.01 #Ajout d'un _ a la fin du nom pour eviter les problemes avec ncbi blast+
|
0
|
3
|
|
4 use strict;
|
|
5 use warnings;
|
|
6 use Getopt::Long;
|
|
7
|
|
8 my $input_variant_file;
|
|
9 my $input_assembly_file;
|
|
10 my $WINDOWS_LENGTH = 50;
|
|
11
|
|
12 GetOptions (
|
|
13 "input_variant_file=s" => \$input_variant_file,
|
|
14 "input_assembly_file=s" => \$input_assembly_file,
|
|
15 "window_length=i" => \$WINDOWS_LENGTH
|
|
16 ) or die("Error in command line arguments\n");
|
|
17
|
|
18 open(INV, $input_variant_file) or die ("Can't open $input_variant_file\n");
|
|
19 open(INA, $input_assembly_file) or die ("Can't open $input_assembly_file\n");
|
|
20
|
|
21 my @variant_list;
|
|
22
|
|
23
|
|
24 ### Retrieving the assembly
|
|
25 my %genome;
|
|
26
|
|
27 my $current_header="";
|
|
28 my $current_seq="";
|
|
29 while (my $ligne = <INA>){
|
|
30 if ($ligne =~ /^\>(.*?)\s*$/){
|
|
31 if ($current_header){
|
|
32 $genome{$current_header} = $current_seq;
|
|
33 }
|
|
34 $current_header=$1;
|
|
35 $current_seq = "";
|
|
36 }
|
|
37 else {
|
|
38 if ($ligne=~/^([ATGCNXatgcnx]+)\s*$/){
|
|
39 $current_seq .= $1;
|
|
40 }
|
|
41 else {
|
|
42 print STDERR "Erreur Parsing n°2\n$ligne\n";
|
|
43 }
|
|
44 }
|
|
45 }
|
|
46 #TRAITEMENT DU DERNIER
|
|
47 if ($current_header){
|
|
48 $genome{$current_header} = $current_seq;
|
|
49 undef($current_seq);
|
|
50 }
|
|
51 close (INA);
|
|
52
|
|
53
|
|
54 ### Retrieving the variant
|
|
55 while (my $ligne=<INV>){
|
|
56 if ($ligne !~ /^\s*$/){
|
|
57 my %variant;
|
|
58 my @fields = split (/\s+/,$ligne);
|
|
59 $variant{"ref"}=$fields[0];
|
|
60 $variant{"position"}=$fields[1];
|
|
61 $variant{"baseref"}=$fields[2];
|
|
62 $variant{"depth"}=$fields[3];
|
|
63 $variant{"pileup"}=$fields[4];
|
|
64
|
|
65
|
|
66 my $start = &max($variant{"position"} - $WINDOWS_LENGTH,1);
|
|
67 my $stop = &min ($variant{"position"} + $WINDOWS_LENGTH,length($genome{$variant{"ref"}}));
|
|
68 my $length = $stop-$start+1;
|
|
69
|
|
70 #print $variant{"position"}," / ",length($genome{$variant{"ref"}})," / ","$start / $stop / $length \n";
|
|
71
|
|
72 $variant{"SEQ"} = substr $genome{$variant{"ref"}},$start-1,$length;
|
|
73
|
|
74 my $pileup = $variant{"pileup"};
|
|
75 $pileup =~ s/\$//g; #the read start at this position
|
|
76 $pileup =~ s/\^.//g; #the read end at this position
|
|
77 my $descriptor = $variant{"position"}."_".$variant{"depth"}."_";
|
|
78 if ($pileup=~/\+([0-9]+)([ACGTNacgtn]+)/){
|
|
79 $descriptor .="I".$1."_".$2;
|
|
80 }
|
|
81 elsif ($pileup=~/\-([0-9]+)([ACGTNacgtn]+)/){
|
|
82 $descriptor .="D".$1."_".$2;
|
|
83 }
|
|
84 elsif ($pileup=~/([ACGTNacgtn])/){
|
|
85 $descriptor.="M1"."_".$1;
|
|
86 }
|
|
87 else {
|
|
88 $descriptor.="?_?";
|
|
89 }
|
|
90 $variant{"desc"}=$descriptor;
|
|
91
|
9
|
92 print ">",$variant{"ref"},"_",$descriptor,"_","\n",$variant{"SEQ"},"\n";
|
0
|
93
|
|
94
|
|
95
|
|
96 #print ">",$variant{"ref"},"_",$variant{"position"},"_",$variant{"depth"},"\n",$variant{"SEQ"},"\n";
|
|
97
|
|
98 push(@variant_list,\%variant);
|
|
99 }
|
|
100 }
|
|
101 close (INV);
|
|
102
|
|
103
|
|
104
|
|
105
|
|
106
|
|
107
|
|
108
|
|
109
|
|
110
|
|
111 #***********
|
|
112 sub min{
|
|
113 my $first = shift;
|
|
114 my $second = shift;
|
|
115 if ($first <= $second){
|
|
116 return $first;
|
|
117 }
|
|
118 else {
|
|
119 return $second;
|
|
120 }
|
|
121 }
|
|
122
|
|
123 sub max {
|
|
124 my $first = shift;
|
|
125 my $second = shift;
|
|
126 if ($first >= $second){
|
|
127 return $first;
|
|
128 }
|
|
129 else {
|
|
130 return $second;
|
|
131 }
|
6
|
132 }
|