view SCValidator.pm @ 0:acc8d8bfeb9a

Uploaded
author jjohnson
date Wed, 08 Feb 2012 16:59:24 -0500
parents
children
line wrap: on
line source

package SCValidator;
# this is a singleton class

use strict;
use Exporter 'import';
our @EXPORT_OK = qw($lowqual_cutoff $min_percent_id $min_percent_hq LEFT_CLIP RIGHT_CLIP);
use Bio::DB::Sam;
use constant LEFT_CLIP => 0;
use constant RIGHT_CLIP => 1;

our $min_percent_hq = 80;
our $lowqual_cutoff = 20;
our $min_percent_id = 90;

my $SCValidator;
my %default_validators = ( 
	"quality_validator" => \&_quality_validator,
	"align_validator"	=> \&_align_validator,
	"strand_validator"	=> \&_strand_validator,
);

sub new {
	if( $SCValidator) {
		return $SCValidator;
	} else {
		my $type = shift;
		my $this = {
			"validator" => {}	
		};
		foreach my $v ( keys(%default_validators)) {
			$this->{validator}{$v} = $default_validators{$v};
		}
		$SCValidator =  bless $this, $type;
	}
}

sub add_validator {
	my $self = shift;
	my $name = shift;
	$name = lc $name;
	if( exists($self->{validator}{$name})) {
#		print STDERR "Validator: $name is already in the list";
		return;
	}
	if( exists($default_validators{$name})) {
		$self->{validator}{$name} = $default_validators{$name};
		return;
	}
	my $validator = shift;
	die "Validator must be coderef" if(ref($validator) ne "CODE");
	$self->{validator}{$name} = $validator;
}

sub remove_validator {
	my ($self, $name) = @_;
	$name = lc $name;
	if(exists($self->{validator}{$name})) {
		delete $self->{validator}{$name};
	}
}

sub validate {
	my $self = shift;
	my $a = shift;
	my $clip = shift;
	die "The method will only validate bam alignment object"
		if( ref($a) ne "Bio::DB::Bam::Aligment" && 
			ref($a) ne "Bio::DB::Bam::AlignWrapper" );
	return undef if($a->unmapped);
	foreach my $v (keys(%{$self->{"validator"}})) {
		return undef if(!$self->{"validator"}{$v}->($a, $clip));
	}
	return 1;
}

sub _align_validator {
	#we only consider high quality part of the alignment
	my $a = shift;
	my ($ref, $matches, $query) = $a->padded_alignment;
	my @qscores = $a->qscore;
	my $n_matches = 0;
	my $align_len = 0;
	my $j = 0;
	my ($start, $end) = (0, length($ref)-1);
	my @cigar_array = @{ $a->cigar_array };
	$start = $cigar_array[0]->[1] if($cigar_array[0]->[0] eq 'S');
	$end = $end - $cigar_array[$#cigar_array]->[1] if($cigar_array[$#cigar_array]->[0] eq 'S');
	
	for( my $i = $start; $i <= $end; $i++) {
		my $m = substr($matches, $i, 1);
		if($m ne '|') {
			if( substr($query, $i, 1) ne '-') {
		#		$align_len++ if($qscores[$j] >= $lowqual_cutoff);
				$j++;
			}
		#	else {
		#		$align_len++ if($qscores[$j] >= $lowqual_cutoff && $qscores[$j+1] >= $lowqual_cutoff);
		#	}
			next;
		}
		if($qscores[$j] >= $lowqual_cutoff) {
			$n_matches++ if(substr($ref, $i, 1) eq substr($query, $i, 1));
			$align_len++;
		}
		$j++;
	}
	return ($align_len - $n_matches > 1) ? undef : 1 if($align_len < 20);
	return ($n_matches * 100 >= $align_len * $min_percent_id) ? 1 : undef;
}

sub _quality_validator {
	my $a = shift;
	my $clip = shift;
#	my $cigar_str = $a->cigar_str;
	my @cigar_array = @{$a->cigar_array};
	my @qual = $a->qscore;
	if($clip == LEFT_CLIP) {
		#my($sclip_len) = $cigar_str =~ m/^S(\d+)/;
		my $sclip_len = $cigar_array[0]->[1];
		@qual = @qual[0 .. ($sclip_len - 1)];
	}
	elsif ($clip == RIGHT_CLIP) {
		#my($sclip_len) = $cigar_str =~ m/S(\d+)$/;
		my $sclip_len = $cigar_array[$#cigar_array]->[1];
		@qual = @qual[($a->l_qseq - $sclip_len) .. ($a->l_qseq - 1)];
	}
	else {
		print STDERR "Please specify the soft_clipping position:
			0: LEFT_CLIP or 1: RIGHT_CLIP";
	}

	my $n_hq = 0;
	for( my $i = 0; $i <= $#qual; $i++) {
		$n_hq++ if($qual[$i] >= $lowqual_cutoff);
	}
	return ($n_hq * 100 > $#qual * $min_percent_hq) ? 1 : undef;
}

sub _strand_validator {
	my $a = shift;
	my $clip = shift;
	if($clip == LEFT_CLIP ) {
		return ($a->paired && ($a->mtid != $a->tid || $a->mate_start < $a->start)) ? undef : 1;
	}
	elsif($clip == RIGHT_CLIP) {
		return ($a->paired && ($a->mtid != $a->tid || $a->mate_start > $a->end)) ? undef : 1;
	}
	else {
		 print STDERR "Please specify the soft_clipping position:
             0: LEFT_CLIP or 1: RIGHT_CLIP";
	}
}

1;