Mercurial > repos > mcharles > rapsosnp
diff rapsodyn/ParseBlastForUniqueMatch.pl @ 7:3f7b0788a1c4 draft
Uploaded
author | mcharles |
---|---|
date | Tue, 07 Oct 2014 10:34:34 -0400 |
parents | 442a7c88b886 |
children | 0a6c1cfe4dc8 |
line wrap: on
line diff
--- a/rapsodyn/ParseBlastForUniqueMatch.pl Mon Sep 29 03:02:16 2014 -0400 +++ b/rapsodyn/ParseBlastForUniqueMatch.pl Tue Oct 07 10:34:34 2014 -0400 @@ -1,13 +1,26 @@ #!/usr/bin/perl +#V1.0.1 added log, option parameters use strict; use warnings; +use Getopt::Long; -my $input_variant_file = $ARGV[0]; -my $input_blast_file = $ARGV[1]; -my $window_length = $ARGV[2]; -my $nb_mismatch_max = $ARGV[3]; +my $input_variant_file; +my $input_blast_file; +my $log_file; +my $WINDOW_LENGTH= 50; +my $NB_MISMATCH_MAX = 3; +GetOptions ( +"input_variant_file=s" => \$input_variant_file, +"input_blast_file=s" => \$input_blast_file, +"window_length=s" => \$WINDOW_LENGTH, +"log_file=s" => \$log_file, +"nb_mismatch_max=s" => \$NB_MISMATCH_MAX +) or die("Error in command line arguments\n"); + +my $nb_variant_checked=0; +my $nb_variant_selected=0; my %hash_name; open(INB, $input_blast_file) or die ("Can't open $input_blast_file\n"); @@ -71,7 +84,7 @@ my $nb_gap_query=0; if (length($query_aln) == length($subject_aln)){ - if (length($query_aln)<$window_length-$nb_mismatch_max){ + if (length($query_aln)<$WINDOW_LENGTH-$NB_MISMATCH_MAX){ } else { my @q = split(//,$query_aln); @@ -79,27 +92,27 @@ for (my $i=0;$i<=$#q;$i++){ my $global_idx = $query_start-1+$i-$nb_gap_query; if ($q[$i] eq "-"){ - if ($global_idx < $window_length){ + if ($global_idx < $WINDOW_LENGTH){ $compt_mismatch_5p++; } - elsif ($global_idx > $window_length){ + elsif ($global_idx > $WINDOW_LENGTH){ $compt_mismatch_3p++; } $nb_gap_query++; #On compte les gap dans la query pour les soustraire de l'index global } else { if ($q[$i] ne $s[$i]){ - if ($global_idx < $window_length){ + if ($global_idx < $WINDOW_LENGTH){ $compt_mismatch_5p++; } - elsif ($global_idx > $window_length){ + elsif ($global_idx > $WINDOW_LENGTH){ $compt_mismatch_3p++; } } } } $compt_mismatch_5p += $query_start-1; - $compt_mismatch_3p += $window_length *2 + 1 - $query_stop; + $compt_mismatch_3p += $WINDOW_LENGTH *2 + 1 - $query_stop; # for (my $i=0;$i<$window_length;$i++){ # if ($tbl_q_aln[$i] eq "#"){ @@ -122,7 +135,7 @@ # else { # } # } - if (($compt_mismatch_5p <= $nb_mismatch_max)||($compt_mismatch_3p <= $nb_mismatch_max)){ + if (($compt_mismatch_5p <= $NB_MISMATCH_MAX)||($compt_mismatch_3p <= $NB_MISMATCH_MAX)){ $hash_name{$name}++; } @@ -151,6 +164,7 @@ open(INV, $input_variant_file) or die ("Can't open $input_variant_file\n"); while (my $ligne = <INV>) { + $nb_variant_checked++; my @champs = split (/\s+/,$ligne); my $header = $champs[0]."_".$champs[1]; @@ -158,6 +172,7 @@ if ($hash_name{$header}){ if ($hash_name{$header}==1){ print $ligne; + $nb_variant_selected++; } } else { @@ -169,6 +184,12 @@ close(INV); +open (LF,">$log_file") or die("Can't open $log_file\n"); +print LF "\n####\t Blast filtering \n"; +print LF "Variant checked :\t$nb_variant_checked\n"; +print LF "Variant selected :\t$nb_variant_selected\n"; +close (LF); + # foreach my $key (sort keys %hash_name){ # print $key,"\t",$hash_name{$key},"\n";