# HG changeset patch
# User mcharles
# Date 1410340493 14400
# Node ID 5c0e8f82bbbf8cbac5d563dabd384dba133347f0
# Parent 39376c7204be22aa04642905befe6058dedac6ca
Uploaded
diff -r 39376c7204be -r 5c0e8f82bbbf rapsodyn/mpileupfilteronblastxml.pl
--- a/rapsodyn/mpileupfilteronblastxml.pl Wed Sep 10 05:12:05 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,287 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-use Getopt::Long;
-
-my $input_variant_file;
-my $input_blastxml_file;
-my $window_length = 50;
-my $nb_mismatch_max = 2;
-
-GetOptions (
-"input_variant_file=s" => \$input_variant_file,
-"input_blastxml_file=s" => \$input_blastxml_file,
-"window_length=i" => \$window_length,
-"nb_mismatch_max=i" => \$nb_mismatch_max
-) or die("Error in command line arguments\n");
-
-
-open(INB, $input_blastxml_file) or die ("Can't open $input_blastxml_file\n");
-
-
-my $iteration_stop="";
-my $hit_stop="";
-my $hsp_stop="";
-my $query_flag_start="";
-my $query_flag_stop="";
-my $subject_flag_start="";
-my $subject_flag_stop="";
-my $query_HSP_flag_from_start="";
-my $query_HSP_flag_from_stop="";
-my $query_HSP_flag_to_start="";
-my $query_HSP_flag_to_stop="";
-my $subject_HSP_flag_from_start="";
-my $subject_HSP_flag_from_stop="";
-my $subject_HSP_flag_to_start="";
-my $subject_HSP_flag_to_stop="";
-my $query_HSP_flag_seq_start="";
-my $query_HSP_flag_seq_stop="";
-my $subject_HSP_flag_seq_start="";
-my $subject_HSP_flag_seq_stop="";
-my $HSP_midline_flag_start="";
-my $HSP_midline_flag_stop="";
-
-my %hash;
-
-my $compt=0;
-my $current_query="";
-my $current_subject="";
-my $current_HSP_query_from;
-my $current_HSP_query_to;
-my $current_HSP_subject_from;
-my $current_HSP_subject_to;
-my $current_HSP_midline;
-my $current_HSP_qseq;
-my $current_HSP_hseq;
-my $current_query_short;
-
-while (my $ligne = ) {
-
- if ($ligne=~/$subject_flag_start(.*?)$subject_flag_stop/){
- $current_subject=$1;
- #print "--",$1,"\n";
- }
-
- if ($ligne=~/$query_flag_start(.*?)$query_flag_stop/){
- $current_query=$1;
- $current_query_short=$current_query;
- if ($current_query =~ /^(.*?_\d+)\_/){
- $current_query_short=$1;
- }
- if (!$hash{$current_query_short}){
- $hash{$current_query_short}=0;
- }
-
-
- #print "--",$1,"\n";
- }
- if ($ligne=~/$query_HSP_flag_from_start(.*?)$query_HSP_flag_from_stop/){
- $current_HSP_query_from=$1;
- #print "--",$1,"...";
- }
- if ($ligne=~/$query_HSP_flag_to_start(.*?)$query_HSP_flag_to_stop/){
- $current_HSP_query_to=$1;
- #print $1,"\n";
- }
- if ($ligne=~/$subject_HSP_flag_from_start(.*?)$subject_HSP_flag_from_stop/){
- $current_HSP_subject_from=$1;
- #print "--",$1,"...";
- }
- if ($ligne=~/$subject_HSP_flag_to_start(.*?)$subject_HSP_flag_to_stop/){
- $current_HSP_subject_to=$1;
- #print $1,"\n";
- }
- if ($ligne=~/$query_HSP_flag_seq_start(.*?)$query_HSP_flag_seq_stop/){
- $current_HSP_qseq=$1;
- #print "--",$1,"\n";
- }
- if ($ligne=~/$subject_HSP_flag_seq_start(.*?)$subject_HSP_flag_seq_stop/){
- $current_HSP_hseq=$1;
- #print "--",$1,"\n";
- }
- if ($ligne=~/$HSP_midline_flag_start(.*?)$HSP_midline_flag_stop/){
- $current_HSP_midline=$1;
- #print "--",$1,"\n";
- }
-
- if ($ligne=~/$hsp_stop/){
- if ($current_HSP_query_from){
- #print "\ntest1\n";
- #print "Query : $current_query\n";
- #print "Subject : $current_subject\n";
- #print "$current_HSP_query_from ... $current_HSP_query_to\n";
- #print "$current_HSP_subject_from ... $current_HSP_subject_to\n";
- for (my $i=1;$i<$current_HSP_query_from;$i++){
- $current_HSP_qseq = "N".$current_HSP_qseq;
- $current_HSP_midline = " ".$current_HSP_midline;
- $current_HSP_hseq = "N".$current_HSP_hseq;
- }
- for (my $i=$current_HSP_query_to+1;$i<=$window_length*2+1;$i++){
- $current_HSP_qseq .= "N";
- $current_HSP_midline .= " ";
- $current_HSP_hseq .= "N";
- }
-
- my @qseq = split(//,$current_HSP_qseq);
- my @midline = split(//,$current_HSP_midline);
- my @hseq = split(//,$current_HSP_hseq);
-
- my $comptbase=0;
- my $compt5p=0;
- my $compt3p=0;
- for (my $i=0;$i<=$#qseq;$i++){
- if ($qseq[$i] ne "-"){
- $comptbase++; # Va de 1 -> $window_length *2 +1
- }
- if ($midline[$i] eq " "){
- if ($comptbase<=$window_length){ #1 -> $window_length
- $compt5p++;
- }
- elsif ($comptbase>=$window_length+2){ #$window_length+2 -> $window_length *2 + 1;
- $compt3p++;
- }
- else { #+1-$window_length*2+1
-
- }
- }
- }
- if (($compt3p<=$nb_mismatch_max)||($compt5p<=$nb_mismatch_max)){
- $hash{$current_query_short}++;
- }
-
- #print "$current_HSP_qseq\n";
- #print "$current_HSP_midline\n";
- #print "$current_HSP_hseq\n";
- #print "$compt5p // $compt3p\n";
- #print $hash{$current_query_short},"\n";
-
- }
-
- undef $current_HSP_query_from;
- undef $current_HSP_query_to;
- undef $current_HSP_subject_from;
- undef $current_HSP_subject_to;
- $current_HSP_midline="";
- $current_HSP_qseq="";
- $current_HSP_hseq="";
- }
-
- if ($ligne=~/$iteration_stop/){
- if ($current_HSP_query_from){
- #print "\ntest2\n";
- #print "Query : $current_query\n";
- #print "Subject : $current_subject\n";
- #print "$current_HSP_query_from ... $current_HSP_query_to\n";
- #print "$current_HSP_subject_from ... $current_HSP_subject_to\n";
- for (my $i=1;$i<$current_HSP_query_from;$i++){
- $current_HSP_qseq = "N".$current_HSP_qseq;
- $current_HSP_midline = " ".$current_HSP_midline;
- $current_HSP_hseq = "N".$current_HSP_hseq;
- }
- for (my $i=$current_HSP_query_to+1;$i<=$window_length*2+1;$i++){
- $current_HSP_qseq .= "N";
- $current_HSP_midline .= " ";
- $current_HSP_hseq .= "N";
- }
-
- my @qseq = split(//,$current_HSP_qseq);
- my @midline = split(//,$current_HSP_midline);
- my @hseq = split(//,$current_HSP_hseq);
-
- my $comptbase=0;
- my $compt5p=0;
- my $compt3p=0;
- for (my $i=0;$i<=$#qseq;$i++){
- if ($qseq[$i] ne "-"){
- $comptbase++; # Va de 1 -> $window_length *2 +1
- }
- if ($midline[$i] eq " "){
- if ($comptbase<=$window_length){ #1 -> $window_length
- $compt5p++;
- }
- elsif ($comptbase>=$window_length+2){ #$window_length+2 -> $window_length *2 + 1;
- $compt3p++;
- }
- else { #+1-$window_length*2+1
-
- }
- }
- }
- if (($compt3p<=$nb_mismatch_max)||($compt5p<=$nb_mismatch_max)){
- $hash{$current_query_short}++;
- }
-
- #print "$current_HSP_qseq\n";
- #print "$current_HSP_midline\n";
- #print "$current_HSP_hseq\n";
- #print "$compt5p // $compt3p\n";
- #print $hash{$current_query_short},"\n";
- }
- $current_query="";
- $current_query_short="";
- $current_subject="";
- undef $current_HSP_query_from;
- undef $current_HSP_query_to;
- undef $current_HSP_subject_from;
- undef $current_HSP_subject_to;
- $current_HSP_midline="";
- $current_HSP_qseq="";
- $current_HSP_hseq="";
- }
-
-}
-
-close (INB);
-
-# foreach my $key (sort trinombre keys %hash){
- # print $key," ",$hash{$key},"\n";
-
-# }
-# exit(0);
-
-open(INV, $input_variant_file) or die ("Can't open $input_variant_file\n");
-
-while (my $ligne = ) {
- my @champs = split (/\s+/,$ligne);
- my $header = $champs[0]."_".$champs[1];
-
- if ($hash{$header}){
- if ($hash{$header}==1){
- print "$ligne";
- }
- else {
- #print $hash{$header}," $ligne";
- }
- }
- else {
- print STDERR "No blast result for ",$header,"\n";
- }
-
-
-}
-
-close(INV);
-
-
-
-sub trinombre {
- my $chra=$a;
- my $posa=0;
- my $chrb=$b;
- my $posb=0;
-
- if ($a =~/(.*?)\_(\d+)/){
- $chra=$1;
- $posa=$2;
- }
- if ($b =~/(.*?)\_(\d+)/){
- $chrb=$1;
- $posb=$2;
- }
-
-
- $chra cmp $chrb
- ||
- $posa <=> $posb;
-}
-
diff -r 39376c7204be -r 5c0e8f82bbbf rapsodyn/mpileupfilteronblastxml.xml
--- a/rapsodyn/mpileupfilteronblastxml.xml Wed Sep 10 05:12:05 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-
-Filter mpileup with blast results
-
- mpileupfilteronblastxml.pl -input_variant_file $input_variant_file -input_blastxml_file $input_blastxml_file -window_length $window_length -nb_mismatch_max $nb_mismatch_max > $output_file
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-