comparison rapsodyn/extractseq.pl @ 0:442a7c88b886 draft

Uploaded
author mcharles
date Wed, 10 Sep 2014 09:18:15 -0400
parents
children 1776b8ddd87e
comparison
equal deleted inserted replaced
-1:000000000000 0:442a7c88b886
1 #!/usr/bin/perl
2 #V1.10
3
4 use strict;
5 use warnings;
6 use Getopt::Long;
7
8 my $input_variant_file;
9 my $input_assembly_file;
10 my $WINDOWS_LENGTH = 50;
11
12 GetOptions (
13 "input_variant_file=s" => \$input_variant_file,
14 "input_assembly_file=s" => \$input_assembly_file,
15 "window_length=i" => \$WINDOWS_LENGTH
16 ) or die("Error in command line arguments\n");
17
18 open(INV, $input_variant_file) or die ("Can't open $input_variant_file\n");
19 open(INA, $input_assembly_file) or die ("Can't open $input_assembly_file\n");
20
21 my @variant_list;
22
23
24 ### Retrieving the assembly
25 my %genome;
26
27 my $current_header="";
28 my $current_seq="";
29 while (my $ligne = <INA>){
30 if ($ligne =~ /^\>(.*?)\s*$/){
31 if ($current_header){
32 $genome{$current_header} = $current_seq;
33 }
34 $current_header=$1;
35 $current_seq = "";
36 }
37 else {
38 if ($ligne=~/^([ATGCNXatgcnx]+)\s*$/){
39 $current_seq .= $1;
40 }
41 else {
42 print STDERR "Erreur Parsing n°2\n$ligne\n";
43 }
44 }
45 }
46 #TRAITEMENT DU DERNIER
47 if ($current_header){
48 $genome{$current_header} = $current_seq;
49 undef($current_seq);
50 }
51 close (INA);
52
53
54 ### Retrieving the variant
55 while (my $ligne=<INV>){
56 if ($ligne !~ /^\s*$/){
57 my %variant;
58 my @fields = split (/\s+/,$ligne);
59 $variant{"ref"}=$fields[0];
60 $variant{"position"}=$fields[1];
61 $variant{"baseref"}=$fields[2];
62 $variant{"depth"}=$fields[3];
63 $variant{"pileup"}=$fields[4];
64
65
66 my $start = &max($variant{"position"} - $WINDOWS_LENGTH,1);
67 my $stop = &min ($variant{"position"} + $WINDOWS_LENGTH,length($genome{$variant{"ref"}}));
68 my $length = $stop-$start+1;
69
70 #print $variant{"position"}," / ",length($genome{$variant{"ref"}})," / ","$start / $stop / $length \n";
71
72 $variant{"SEQ"} = substr $genome{$variant{"ref"}},$start-1,$length;
73
74 my $pileup = $variant{"pileup"};
75 $pileup =~ s/\$//g; #the read start at this position
76 $pileup =~ s/\^.//g; #the read end at this position
77 my $descriptor = $variant{"position"}."_".$variant{"depth"}."_";
78 if ($pileup=~/\+([0-9]+)([ACGTNacgtn]+)/){
79 $descriptor .="I".$1."_".$2;
80 }
81 elsif ($pileup=~/\-([0-9]+)([ACGTNacgtn]+)/){
82 $descriptor .="D".$1."_".$2;
83 }
84 elsif ($pileup=~/([ACGTNacgtn])/){
85 $descriptor.="M1"."_".$1;
86 }
87 else {
88 $descriptor.="?_?";
89 }
90 $variant{"desc"}=$descriptor;
91
92 print ">",$variant{"ref"},"_",$descriptor,"\n",$variant{"SEQ"},"\n";
93
94
95
96 #print ">",$variant{"ref"},"_",$variant{"position"},"_",$variant{"depth"},"\n",$variant{"SEQ"},"\n";
97
98 push(@variant_list,\%variant);
99 }
100 }
101 close (INV);
102
103
104
105
106
107
108
109
110
111 #***********
112 sub min{
113 my $first = shift;
114 my $second = shift;
115 if ($first <= $second){
116 return $first;
117 }
118 else {
119 return $second;
120 }
121 }
122
123 sub max {
124 my $first = shift;
125 my $second = shift;
126 if ($first >= $second){
127 return $first;
128 }
129 else {
130 return $second;
131 }
132 }