Mercurial > repos > mcharles > rapsosnp
comparison rapsodyn/extractseq.pl @ 0:442a7c88b886 draft
Uploaded
author | mcharles |
---|---|
date | Wed, 10 Sep 2014 09:18:15 -0400 |
parents | |
children | 1776b8ddd87e |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:442a7c88b886 |
---|---|
1 #!/usr/bin/perl | |
2 #V1.10 | |
3 | |
4 use strict; | |
5 use warnings; | |
6 use Getopt::Long; | |
7 | |
8 my $input_variant_file; | |
9 my $input_assembly_file; | |
10 my $WINDOWS_LENGTH = 50; | |
11 | |
12 GetOptions ( | |
13 "input_variant_file=s" => \$input_variant_file, | |
14 "input_assembly_file=s" => \$input_assembly_file, | |
15 "window_length=i" => \$WINDOWS_LENGTH | |
16 ) or die("Error in command line arguments\n"); | |
17 | |
18 open(INV, $input_variant_file) or die ("Can't open $input_variant_file\n"); | |
19 open(INA, $input_assembly_file) or die ("Can't open $input_assembly_file\n"); | |
20 | |
21 my @variant_list; | |
22 | |
23 | |
24 ### Retrieving the assembly | |
25 my %genome; | |
26 | |
27 my $current_header=""; | |
28 my $current_seq=""; | |
29 while (my $ligne = <INA>){ | |
30 if ($ligne =~ /^\>(.*?)\s*$/){ | |
31 if ($current_header){ | |
32 $genome{$current_header} = $current_seq; | |
33 } | |
34 $current_header=$1; | |
35 $current_seq = ""; | |
36 } | |
37 else { | |
38 if ($ligne=~/^([ATGCNXatgcnx]+)\s*$/){ | |
39 $current_seq .= $1; | |
40 } | |
41 else { | |
42 print STDERR "Erreur Parsing n°2\n$ligne\n"; | |
43 } | |
44 } | |
45 } | |
46 #TRAITEMENT DU DERNIER | |
47 if ($current_header){ | |
48 $genome{$current_header} = $current_seq; | |
49 undef($current_seq); | |
50 } | |
51 close (INA); | |
52 | |
53 | |
54 ### Retrieving the variant | |
55 while (my $ligne=<INV>){ | |
56 if ($ligne !~ /^\s*$/){ | |
57 my %variant; | |
58 my @fields = split (/\s+/,$ligne); | |
59 $variant{"ref"}=$fields[0]; | |
60 $variant{"position"}=$fields[1]; | |
61 $variant{"baseref"}=$fields[2]; | |
62 $variant{"depth"}=$fields[3]; | |
63 $variant{"pileup"}=$fields[4]; | |
64 | |
65 | |
66 my $start = &max($variant{"position"} - $WINDOWS_LENGTH,1); | |
67 my $stop = &min ($variant{"position"} + $WINDOWS_LENGTH,length($genome{$variant{"ref"}})); | |
68 my $length = $stop-$start+1; | |
69 | |
70 #print $variant{"position"}," / ",length($genome{$variant{"ref"}})," / ","$start / $stop / $length \n"; | |
71 | |
72 $variant{"SEQ"} = substr $genome{$variant{"ref"}},$start-1,$length; | |
73 | |
74 my $pileup = $variant{"pileup"}; | |
75 $pileup =~ s/\$//g; #the read start at this position | |
76 $pileup =~ s/\^.//g; #the read end at this position | |
77 my $descriptor = $variant{"position"}."_".$variant{"depth"}."_"; | |
78 if ($pileup=~/\+([0-9]+)([ACGTNacgtn]+)/){ | |
79 $descriptor .="I".$1."_".$2; | |
80 } | |
81 elsif ($pileup=~/\-([0-9]+)([ACGTNacgtn]+)/){ | |
82 $descriptor .="D".$1."_".$2; | |
83 } | |
84 elsif ($pileup=~/([ACGTNacgtn])/){ | |
85 $descriptor.="M1"."_".$1; | |
86 } | |
87 else { | |
88 $descriptor.="?_?"; | |
89 } | |
90 $variant{"desc"}=$descriptor; | |
91 | |
92 print ">",$variant{"ref"},"_",$descriptor,"\n",$variant{"SEQ"},"\n"; | |
93 | |
94 | |
95 | |
96 #print ">",$variant{"ref"},"_",$variant{"position"},"_",$variant{"depth"},"\n",$variant{"SEQ"},"\n"; | |
97 | |
98 push(@variant_list,\%variant); | |
99 } | |
100 } | |
101 close (INV); | |
102 | |
103 | |
104 | |
105 | |
106 | |
107 | |
108 | |
109 | |
110 | |
111 #*********** | |
112 sub min{ | |
113 my $first = shift; | |
114 my $second = shift; | |
115 if ($first <= $second){ | |
116 return $first; | |
117 } | |
118 else { | |
119 return $second; | |
120 } | |
121 } | |
122 | |
123 sub max { | |
124 my $first = shift; | |
125 my $second = shift; | |
126 if ($first >= $second){ | |
127 return $first; | |
128 } | |
129 else { | |
130 return $second; | |
131 } | |
132 } |