Mercurial > repos > mcharles > rapsodyn
changeset 26:5c0e8f82bbbf draft
Uploaded
author | mcharles |
---|---|
date | Wed, 10 Sep 2014 05:14:53 -0400 |
parents | 39376c7204be |
children | 5b6c476166a7 |
files | rapsodyn/mpileupfilteronblastxml.pl rapsodyn/mpileupfilteronblastxml.xml |
diffstat | 2 files changed, 0 insertions(+), 308 deletions(-) [+] |
line wrap: on
line diff
--- a/rapsodyn/mpileupfilteronblastxml.pl Wed Sep 10 05:12:05 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,287 +0,0 @@ -#!/usr/bin/perl -use strict; -use warnings; -use Getopt::Long; - -my $input_variant_file; -my $input_blastxml_file; -my $window_length = 50; -my $nb_mismatch_max = 2; - -GetOptions ( -"input_variant_file=s" => \$input_variant_file, -"input_blastxml_file=s" => \$input_blastxml_file, -"window_length=i" => \$window_length, -"nb_mismatch_max=i" => \$nb_mismatch_max -) or die("Error in command line arguments\n"); - - -open(INB, $input_blastxml_file) or die ("Can't open $input_blastxml_file\n"); - - -my $iteration_stop="</Iteration>"; -my $hit_stop="</Hit>"; -my $hsp_stop="</Hsp>"; -my $query_flag_start="<Iteration_query-def>"; -my $query_flag_stop="</Iteration_query-def>"; -my $subject_flag_start="<Hit_def>"; -my $subject_flag_stop="</Hit_def>"; -my $query_HSP_flag_from_start="<Hsp_query-from>"; -my $query_HSP_flag_from_stop="</Hsp_query-from>"; -my $query_HSP_flag_to_start="<Hsp_query-to>"; -my $query_HSP_flag_to_stop="</Hsp_query-to>"; -my $subject_HSP_flag_from_start="<Hsp_hit-from>"; -my $subject_HSP_flag_from_stop="</Hsp_hit-from>"; -my $subject_HSP_flag_to_start="<Hsp_hit-to>"; -my $subject_HSP_flag_to_stop="</Hsp_hit-to>"; -my $query_HSP_flag_seq_start="<Hsp_qseq>"; -my $query_HSP_flag_seq_stop="</Hsp_qseq>"; -my $subject_HSP_flag_seq_start="<Hsp_hseq>"; -my $subject_HSP_flag_seq_stop="</Hsp_hseq>"; -my $HSP_midline_flag_start="<Hsp_midline>"; -my $HSP_midline_flag_stop="</Hsp_midline>"; - -my %hash; - -my $compt=0; -my $current_query=""; -my $current_subject=""; -my $current_HSP_query_from; -my $current_HSP_query_to; -my $current_HSP_subject_from; -my $current_HSP_subject_to; -my $current_HSP_midline; -my $current_HSP_qseq; -my $current_HSP_hseq; -my $current_query_short; - -while (my $ligne = <INB>) { - - if ($ligne=~/$subject_flag_start(.*?)$subject_flag_stop/){ - $current_subject=$1; - #print "--",$1,"\n"; - } - - if ($ligne=~/$query_flag_start(.*?)$query_flag_stop/){ - $current_query=$1; - $current_query_short=$current_query; - if ($current_query =~ /^(.*?_\d+)\_/){ - $current_query_short=$1; - } - if (!$hash{$current_query_short}){ - $hash{$current_query_short}=0; - } - - - #print "--",$1,"\n"; - } - if ($ligne=~/$query_HSP_flag_from_start(.*?)$query_HSP_flag_from_stop/){ - $current_HSP_query_from=$1; - #print "--",$1,"..."; - } - if ($ligne=~/$query_HSP_flag_to_start(.*?)$query_HSP_flag_to_stop/){ - $current_HSP_query_to=$1; - #print $1,"\n"; - } - if ($ligne=~/$subject_HSP_flag_from_start(.*?)$subject_HSP_flag_from_stop/){ - $current_HSP_subject_from=$1; - #print "--",$1,"..."; - } - if ($ligne=~/$subject_HSP_flag_to_start(.*?)$subject_HSP_flag_to_stop/){ - $current_HSP_subject_to=$1; - #print $1,"\n"; - } - if ($ligne=~/$query_HSP_flag_seq_start(.*?)$query_HSP_flag_seq_stop/){ - $current_HSP_qseq=$1; - #print "--",$1,"\n"; - } - if ($ligne=~/$subject_HSP_flag_seq_start(.*?)$subject_HSP_flag_seq_stop/){ - $current_HSP_hseq=$1; - #print "--",$1,"\n"; - } - if ($ligne=~/$HSP_midline_flag_start(.*?)$HSP_midline_flag_stop/){ - $current_HSP_midline=$1; - #print "--",$1,"\n"; - } - - if ($ligne=~/$hsp_stop/){ - if ($current_HSP_query_from){ - #print "\ntest1\n"; - #print "Query : $current_query\n"; - #print "Subject : $current_subject\n"; - #print "$current_HSP_query_from ... $current_HSP_query_to\n"; - #print "$current_HSP_subject_from ... $current_HSP_subject_to\n"; - for (my $i=1;$i<$current_HSP_query_from;$i++){ - $current_HSP_qseq = "N".$current_HSP_qseq; - $current_HSP_midline = " ".$current_HSP_midline; - $current_HSP_hseq = "N".$current_HSP_hseq; - } - for (my $i=$current_HSP_query_to+1;$i<=$window_length*2+1;$i++){ - $current_HSP_qseq .= "N"; - $current_HSP_midline .= " "; - $current_HSP_hseq .= "N"; - } - - my @qseq = split(//,$current_HSP_qseq); - my @midline = split(//,$current_HSP_midline); - my @hseq = split(//,$current_HSP_hseq); - - my $comptbase=0; - my $compt5p=0; - my $compt3p=0; - for (my $i=0;$i<=$#qseq;$i++){ - if ($qseq[$i] ne "-"){ - $comptbase++; # Va de 1 -> $window_length *2 +1 - } - if ($midline[$i] eq " "){ - if ($comptbase<=$window_length){ #1 -> $window_length - $compt5p++; - } - elsif ($comptbase>=$window_length+2){ #$window_length+2 -> $window_length *2 + 1; - $compt3p++; - } - else { #+1-$window_length*2+1 - - } - } - } - if (($compt3p<=$nb_mismatch_max)||($compt5p<=$nb_mismatch_max)){ - $hash{$current_query_short}++; - } - - #print "$current_HSP_qseq\n"; - #print "$current_HSP_midline\n"; - #print "$current_HSP_hseq\n"; - #print "$compt5p // $compt3p\n"; - #print $hash{$current_query_short},"\n"; - - } - - undef $current_HSP_query_from; - undef $current_HSP_query_to; - undef $current_HSP_subject_from; - undef $current_HSP_subject_to; - $current_HSP_midline=""; - $current_HSP_qseq=""; - $current_HSP_hseq=""; - } - - if ($ligne=~/$iteration_stop/){ - if ($current_HSP_query_from){ - #print "\ntest2\n"; - #print "Query : $current_query\n"; - #print "Subject : $current_subject\n"; - #print "$current_HSP_query_from ... $current_HSP_query_to\n"; - #print "$current_HSP_subject_from ... $current_HSP_subject_to\n"; - for (my $i=1;$i<$current_HSP_query_from;$i++){ - $current_HSP_qseq = "N".$current_HSP_qseq; - $current_HSP_midline = " ".$current_HSP_midline; - $current_HSP_hseq = "N".$current_HSP_hseq; - } - for (my $i=$current_HSP_query_to+1;$i<=$window_length*2+1;$i++){ - $current_HSP_qseq .= "N"; - $current_HSP_midline .= " "; - $current_HSP_hseq .= "N"; - } - - my @qseq = split(//,$current_HSP_qseq); - my @midline = split(//,$current_HSP_midline); - my @hseq = split(//,$current_HSP_hseq); - - my $comptbase=0; - my $compt5p=0; - my $compt3p=0; - for (my $i=0;$i<=$#qseq;$i++){ - if ($qseq[$i] ne "-"){ - $comptbase++; # Va de 1 -> $window_length *2 +1 - } - if ($midline[$i] eq " "){ - if ($comptbase<=$window_length){ #1 -> $window_length - $compt5p++; - } - elsif ($comptbase>=$window_length+2){ #$window_length+2 -> $window_length *2 + 1; - $compt3p++; - } - else { #+1-$window_length*2+1 - - } - } - } - if (($compt3p<=$nb_mismatch_max)||($compt5p<=$nb_mismatch_max)){ - $hash{$current_query_short}++; - } - - #print "$current_HSP_qseq\n"; - #print "$current_HSP_midline\n"; - #print "$current_HSP_hseq\n"; - #print "$compt5p // $compt3p\n"; - #print $hash{$current_query_short},"\n"; - } - $current_query=""; - $current_query_short=""; - $current_subject=""; - undef $current_HSP_query_from; - undef $current_HSP_query_to; - undef $current_HSP_subject_from; - undef $current_HSP_subject_to; - $current_HSP_midline=""; - $current_HSP_qseq=""; - $current_HSP_hseq=""; - } - -} - -close (INB); - -# foreach my $key (sort trinombre keys %hash){ - # print $key," ",$hash{$key},"\n"; - -# } -# exit(0); - -open(INV, $input_variant_file) or die ("Can't open $input_variant_file\n"); - -while (my $ligne = <INV>) { - my @champs = split (/\s+/,$ligne); - my $header = $champs[0]."_".$champs[1]; - - if ($hash{$header}){ - if ($hash{$header}==1){ - print "$ligne"; - } - else { - #print $hash{$header}," $ligne"; - } - } - else { - print STDERR "No blast result for ",$header,"\n"; - } - - -} - -close(INV); - - - -sub trinombre { - my $chra=$a; - my $posa=0; - my $chrb=$b; - my $posb=0; - - if ($a =~/(.*?)\_(\d+)/){ - $chra=$1; - $posa=$2; - } - if ($b =~/(.*?)\_(\d+)/){ - $chrb=$1; - $posb=$2; - } - - - $chra cmp $chrb - || - $posa <=> $posb; -} -
--- a/rapsodyn/mpileupfilteronblastxml.xml Wed Sep 10 05:12:05 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -<tool id="mpileupfilteronblastxml" name="mpileupfilteronblastxml" version="0.03"> -<description>Filter mpileup with blast results</description> -<command interpreter="perl"> - mpileupfilteronblastxml.pl -input_variant_file $input_variant_file -input_blastxml_file $input_blastxml_file -window_length $window_length -nb_mismatch_max $nb_mismatch_max > $output_file -</command> -<inputs> -<param name="input_variant_file" type="data" format="pileup" label="Select a suitable input VARIANT file from your history"/> -<param name="input_blastxml_file" type="data" format="xml" label="Select a suitable input BLASTXML file from your history"/> -<param name="window_length" type="integer" value="50" label="Number of bases extracted before and after the variant position"/> -<param name="nb_mismatch_max" type="integer" value="3" label="Threshold for mismatch filter"/> -</inputs> -<outputs> - <data name="output_file" format="pileup" label="${tool.name} on ${on_string}"/> -</outputs> - -<help> - - - -</help> -</tool>