Mercurial > repos > mcharles > rapsosnp
view rapsodyn/extractseq.pl @ 0:442a7c88b886 draft
Uploaded
author | mcharles |
---|---|
date | Wed, 10 Sep 2014 09:18:15 -0400 |
parents | |
children | 1776b8ddd87e |
line wrap: on
line source
#!/usr/bin/perl #V1.10 use strict; use warnings; use Getopt::Long; my $input_variant_file; my $input_assembly_file; my $WINDOWS_LENGTH = 50; GetOptions ( "input_variant_file=s" => \$input_variant_file, "input_assembly_file=s" => \$input_assembly_file, "window_length=i" => \$WINDOWS_LENGTH ) or die("Error in command line arguments\n"); open(INV, $input_variant_file) or die ("Can't open $input_variant_file\n"); open(INA, $input_assembly_file) or die ("Can't open $input_assembly_file\n"); my @variant_list; ### Retrieving the assembly my %genome; my $current_header=""; my $current_seq=""; while (my $ligne = <INA>){ if ($ligne =~ /^\>(.*?)\s*$/){ if ($current_header){ $genome{$current_header} = $current_seq; } $current_header=$1; $current_seq = ""; } else { if ($ligne=~/^([ATGCNXatgcnx]+)\s*$/){ $current_seq .= $1; } else { print STDERR "Erreur Parsing n°2\n$ligne\n"; } } } #TRAITEMENT DU DERNIER if ($current_header){ $genome{$current_header} = $current_seq; undef($current_seq); } close (INA); ### Retrieving the variant while (my $ligne=<INV>){ if ($ligne !~ /^\s*$/){ my %variant; my @fields = split (/\s+/,$ligne); $variant{"ref"}=$fields[0]; $variant{"position"}=$fields[1]; $variant{"baseref"}=$fields[2]; $variant{"depth"}=$fields[3]; $variant{"pileup"}=$fields[4]; my $start = &max($variant{"position"} - $WINDOWS_LENGTH,1); my $stop = &min ($variant{"position"} + $WINDOWS_LENGTH,length($genome{$variant{"ref"}})); my $length = $stop-$start+1; #print $variant{"position"}," / ",length($genome{$variant{"ref"}})," / ","$start / $stop / $length \n"; $variant{"SEQ"} = substr $genome{$variant{"ref"}},$start-1,$length; my $pileup = $variant{"pileup"}; $pileup =~ s/\$//g; #the read start at this position $pileup =~ s/\^.//g; #the read end at this position my $descriptor = $variant{"position"}."_".$variant{"depth"}."_"; if ($pileup=~/\+([0-9]+)([ACGTNacgtn]+)/){ $descriptor .="I".$1."_".$2; } elsif ($pileup=~/\-([0-9]+)([ACGTNacgtn]+)/){ $descriptor .="D".$1."_".$2; } elsif ($pileup=~/([ACGTNacgtn])/){ $descriptor.="M1"."_".$1; } else { $descriptor.="?_?"; } $variant{"desc"}=$descriptor; print ">",$variant{"ref"},"_",$descriptor,"\n",$variant{"SEQ"},"\n"; #print ">",$variant{"ref"},"_",$variant{"position"},"_",$variant{"depth"},"\n",$variant{"SEQ"},"\n"; push(@variant_list,\%variant); } } close (INV); #*********** sub min{ my $first = shift; my $second = shift; if ($first <= $second){ return $first; } else { return $second; } } sub max { my $first = shift; my $second = shift; if ($first >= $second){ return $first; } else { return $second; } }