view Tree/Interval.pm @ 1:4f6952e0af48 default tip

CREST - add crest.loc.sample
author Jim Johnson <jj@umn.edu>
date Wed, 08 Feb 2012 16:08:01 -0600
parents acc8d8bfeb9a
children
line wrap: on
line source

package Tree::Interval;

use strict;
use Carp;

use Tree::Interval::Node qw[set_color color_of parent_of left_of right_of];
use Tree::Interval::Node::Constants;
use Tree::DAG_Node;
use vars qw( $VERSION @EXPORT_OK );
$VERSION = 0.1;

require Exporter;
*import    = \&Exporter::import;
@EXPORT_OK = qw[LUEQUAL LUGTEQ LULTEQ LUGREAT LULESS LUNEXT LUPREV];

use enum qw{ LUEQUAL  LUGTEQ  LULTEQ LUGREAT LULESS LUNEXT LUPREV };

# object slots
use enum qw{ ROOT CMP SIZE };

sub new {
    my ($class, $cmp) = @_;
    my $obj = [];
    $obj->[SIZE] = 0;
    if($cmp) {
        ref $cmp eq 'CODE'
          or croak('Invalid arg: codref expected');
        $obj->[CMP] = $cmp;
    }
	else {
		# default compare
		$obj->[CMP] = sub { $_[0]->[0] <=> $_[1]->[0] || $_[0]->[1] <=> $_[1]->[1]};
	}
    return bless $obj => $class;
}


sub DESTROY { $_[0]->[ROOT]->DESTROY if $_[0]->[ROOT] }

sub root { $_[0]->[ROOT] }
sub size { $_[0]->[SIZE] }

sub left_most {
    my $self = shift;
    return undef unless $self->[ROOT];
    return $self->[ROOT]->left_most;
}

sub right_most {
    my $self = shift;
    return undef unless $self->[ROOT];
    return $self->[ROOT]->right_most;
}

# return all the intervals intersect with the interval
sub intersect {
	my $self = shift;
	my $interval = shift;
	return my @tmp  unless $self->[ROOT];
	return $self->[ROOT]->intersect($interval);
}

sub lookup {
    my $self = shift;
    my $key  = shift;
    defined $key
      or croak("Can't use undefined value as key");
    my $mode = shift || LUEQUAL;
    my $cmp = $self->[CMP];

    my $y;
    my $x = $self->[ROOT]
      or return;
    my $next_child;
    while($x) {
        $y = $x;
        if($cmp ? $cmp->($key, $x->[_KEY]) == 0
                : $key eq $x->[_KEY]) {
            # found it!
            if($mode == LUGREAT || $mode == LUNEXT) {
                $x = $x->successor;
            }
            elsif($mode == LULESS || $mode == LUPREV) {
                $x = $x->predecessor;
            }
            return wantarray
              ? ($x->[_VAL], $x)
              : $x->[_VAL];
        }
        if($cmp ? $cmp->($key, $x->[_KEY]) < 0
                : $key lt $x->[_KEY]) {
            $next_child = _LEFT;
        }
        else {
            $next_child = _RIGHT;
        }
        $x = $x->[$next_child];
    }
    # Didn't find it :(
    if($mode == LUGTEQ || $mode == LUGREAT) {
        if($next_child == _LEFT) {
            return wantarray ? ($y->[_VAL], $y) : $y->[_VAL];
        }
        else {
            my $next = $y->successor
              or return;
            return wantarray ? ($next->[_VAL], $next) : $next->[_VAL];
        }
    }
    elsif($mode == LULTEQ || $mode == LULESS) {
        if($next_child == _RIGHT) {
            return wantarray ? ($y->[_VAL], $y) : $y->[_VAL];
        }
        else {
            my $next = $y->predecessor
              or return;
            return wantarray ? ($next->[_VAL], $next) : $next->[_VAL];
        }
    }
    return;
}

sub insert {
    my $self = shift;
    my $key_or_node = shift;
    defined $key_or_node
      or croak("Can't use undefined value as key or node");
    my $val = shift;

    my $cmp = $self->[CMP];
    my $z = (ref $key_or_node eq 'Tree::Interval::Node')
              ? $key_or_node
              : Tree::Interval::Node->new($key_or_node => $val);

    my $y;
    my $x = $self->[ROOT];
    while($x) {
        $y = $x;
        # Handle case of inserting node with duplicate key.
        if($cmp ? $cmp->($z->[_KEY], $x->[_KEY]) == 0
                : $z->[_KEY] eq $x->[_KEY])
        {
			warn "The same key (range) is already in the tree
				it will be replaced!";
            my $old_val = $x->[_VAL];
            $x->[_VAL] = $z->[_VAL];
            return $old_val;
        }

        if($cmp ? $cmp->($z->[_KEY], $x->[_KEY]) < 0
                : $z->[_KEY] lt $x->[_KEY])
        {
            $x = $x->[_LEFT];
        }
        else {
            $x = $x->[_RIGHT];
        }
    }
    # insert new node
    $z->[_PARENT] = $y;
    if(not defined $y) {
        $self->[ROOT] = $z;
    }
    else {
        if($cmp ? $cmp->($z->[_KEY], $y->[_KEY]) < 0
                : $z->[_KEY] lt $y->[_KEY])
        {
            $y->[_LEFT] = $z;
        }
        else {
            $y->[_RIGHT] = $z;
        }
    }
	_update_max($z);
    $self->_fix_after_insertion($z);
    $self->[SIZE]++;
}

sub _fix_after_insertion {
    my $self = shift;
    my $x = shift or croak('Missing arg: node');

    $x->[_COLOR] = RED;
    while($x != $self->[ROOT] && $x->[_PARENT][_COLOR] == RED) {
        my ($child, $rotate1, $rotate2);
        if(($x->[_PARENT] || 0) == ($x->[_PARENT][_PARENT][_LEFT] || 0)) {
            ($child, $rotate1, $rotate2) = (_RIGHT, '_left_rotate', '_right_rotate');
        }
        else {
            ($child, $rotate1, $rotate2) = (_LEFT, '_right_rotate', '_left_rotate');
        }
        my $y = $x->[_PARENT][_PARENT][$child];

        if($y && $y->[_COLOR] == RED) {
            $x->[_PARENT][_COLOR] = BLACK;
            $y->[_COLOR] = BLACK;
            $x->[_PARENT][_PARENT][_COLOR] = RED;
            $x = $x->[_PARENT][_PARENT];
        }
        else {
            if($x == ($x->[_PARENT][$child] || 0)) {
                $x = $x->[_PARENT];
                $self->$rotate1($x);
            }
            $x->[_PARENT][_COLOR] = BLACK;
            $x->[_PARENT][_PARENT][_COLOR] = RED;
            $self->$rotate2($x->[_PARENT][_PARENT]);
        }
    }
    $self->[ROOT][_COLOR] = BLACK;
}

sub delete {
    my ($self, $key_or_node) = @_;
    defined $key_or_node
      or croak("Can't use undefined value as key or node");

    my $z = (ref $key_or_node eq 'Tree::Interval::Node')
              ? $key_or_node
              : ($self->lookup($key_or_node))[1];
    return unless $z;

    my $y;
    if($z->[_LEFT] && $z->[_RIGHT]) {
        # (Notes kindly provided by Christopher Gurnee)
        # When deleting a node 'z' which has two children from a binary search tree, the
        # typical algorithm is to delete the successor node 'y' instead (which is
        # guaranteed to have at most one child), and then to overwrite the key/values of
        # node 'z' (which is still in the tree) with the key/values (which we don't want
        # to lose) from the now-deleted successor node 'y'.

        # Since we need to return the deleted item, it's not good enough to overwrite the
        # key/values of node 'z' with those of node 'y'. Instead we swap them so we can
        # return the deleted values.

        $y = $z->predecessor;
        ($z->[_KEY], $y->[_KEY]) = ($y->[_KEY], $z->[_KEY]);
        ($z->[_VAL], $y->[_VAL]) = ($y->[_VAL], $z->[_VAL]);
        ($z->[_INTERVAL], $y->[_INTERVAL]) = ($y->[_INTERVAL], $z->[_INTERVAL]);
    }
    else {
        $y = $z;
    }
	
    # splice out $y
    my $x = $y->[_LEFT] || $y->[_RIGHT];
    if(defined $x) {
        $x->[_PARENT] = $y->[_PARENT];
        if(! defined $y->[_PARENT]) {
            $self->[ROOT] = $x;
        }
        elsif($y == $y->[_PARENT][_LEFT]) {
            $y->[_PARENT][_LEFT] = $x;
        }
        else {
            $y->[_PARENT][_RIGHT] = $x;
        }
        # Null out links so they are OK to use by _fix_after_deletion
        delete @{$y}[_PARENT, _LEFT, _RIGHT];
		_update_max($x);
        # Fix replacement
        if($y->[_COLOR] == BLACK) {
            $self->_fix_after_deletion($x);
        }
    }
    elsif(! defined $y->[_PARENT]) {
        # return if we are the only node
        delete $self->[ROOT];
    }
    else {
        # No children. Use self as phantom replacement and unlink
        if($y->[_COLOR] == BLACK) {
            $self->_fix_after_deletion($y);
        }
        if(defined $y->[_PARENT]) {
            no warnings 'uninitialized';
            if($y == $y->[_PARENT][_LEFT]) {
                delete $y->[_PARENT][_LEFT];
            }
            elsif($y == $y->[_PARENT][_RIGHT]) {
                delete $y->[_PARENT][_RIGHT];
            }
			my $tmp = $y->[_PARENT];
            delete $y->[_PARENT];
			_update_max($tmp);
        }
    }
    $self->[SIZE]--;
    return $y;
}

sub _fix_after_deletion {
    my $self = shift;
    my $x = shift or croak('Missing arg: node');

    while($x != $self->[ROOT] && color_of($x) == BLACK) {
        my ($child1, $child2, $rotate1, $rotate2);
        no warnings 'uninitialized';
        if($x == left_of(parent_of($x))) {
            ($child1,    $child2,   $rotate1,       $rotate2) =
            (\&right_of, \&left_of, '_left_rotate', '_right_rotate');
        }
        else {
            ($child1,   $child2,    $rotate1,        $rotate2) =
            (\&left_of, \&right_of, '_right_rotate', '_left_rotate');
        }
        use warnings;

        my $w = $child1->(parent_of($x));
        if(color_of($w) == RED) {
            set_color($w, BLACK);
            set_color(parent_of($x), RED);
            $self->$rotate1(parent_of($x));
            $w = right_of(parent_of($x));
        }
        if(color_of($child2->($w)) == BLACK &&
           color_of($child1->($w)) == BLACK) {
            set_color($w, RED);
            $x = parent_of($x);
        }
        else {
            if(color_of($child1->($w)) == BLACK) {
                set_color($child2->($w), BLACK);
                set_color($w, RED);
                $self->$rotate2($w);
                $w = $child1->(parent_of($x));
            }
            set_color($w, color_of(parent_of($x)));
            set_color(parent_of($x), BLACK);
            set_color($child1->($w), BLACK);
            $self->$rotate1(parent_of($x));
            $x = $self->[ROOT];
        }
    }
    set_color($x, BLACK);
}


sub _max3 {
	my ($a, $b, $c) = @_;
	my $min = ($a || $b || $c) - 1;
	$a = $a || $min;
	$b = $b || $min;
	$c = $c || $min;
	return $a if($a >= $b && $a >= $c);
	return $b if($b >= $a && $b >= $c);
	return $c if($c >= $a && $c >= $b);
}

sub _max {
	my $x = shift;
	my $tmp = _max3($x->[_INTERVAL][1], 
		$x->[_LEFT] ? $x->[_LEFT][_MAX] : undef, 
		$x->[_RIGHT] ? $x->[_RIGHT][_MAX] : undef);
	$x->[_MAX] = $tmp;
	return $tmp;
}

sub _update_max {
	my $x = shift;
	#update the max field for each parent node
	_max($x);
	my $k = $x->[_PARENT];
	while($k) {
		my $tmp = $k->[_MAX];
		_max($k);
		last if($tmp == $k->[_MAX]); # no need to update further
		$k = $k->[_PARENT];
	}
}

sub _left_rotate {
    my $self = shift;
    my $x = shift or croak('Missing arg: node');

    my $y = $x->[_RIGHT]
      or return;
    $x->[_RIGHT] = $y->[_LEFT];
    if($y->[_LEFT]) {
        $y->[_LEFT]->[_PARENT] = $x;
    }
    $y->[_PARENT] = $x->[_PARENT];
    if(not defined $x->[_PARENT]) {
        $self->[ROOT] = $y;
    }
    else {
        $x == $x->[_PARENT]->[_LEFT]
          ? $x->[_PARENT]->[_LEFT]  = $y
          : $x->[_PARENT]->[_RIGHT] = $y;
    }
    $y->[_LEFT]   = $x;
    $x->[_PARENT] = $y;
	#update max
	_max($x); _max($y);
}

sub _right_rotate {
    my $self = shift;
    my $y = shift or croak('Missing arg: node');

    my $x = $y->[_LEFT]
      or return;
    $y->[_LEFT] = $x->[_RIGHT];
    if($x->[_RIGHT]) {
        $x->[_RIGHT]->[_PARENT] = $y
    }
    $x->[_PARENT] = $y->[_PARENT];
    if(not defined $y->[_PARENT]) {
        $self->[ROOT] = $x;
    }
    else {
        $y == $y->[_PARENT]->[_RIGHT]
          ? $y->[_PARENT]->[_RIGHT] = $x
          : $y->[_PARENT]->[_LEFT]  = $x;
    }
    $x->[_RIGHT] = $y;
    $y->[_PARENT] = $x;
	_max($y); _max($x);
}

1; 

# Magic true value required at end of module
__END__

=head1 NAME

Tree::RB - Perl implementation of the Red/Black tree, a type of balanced binary search tree. 


=head1 VERSION

This document describes Tree::RB version 0.1


=head1 SYNOPSIS

    use Tree::RB;

    my $tree = Tree::RB->new;
    $tree->put('France'  => 'Paris');
    $tree->put('England' => 'London');
    $tree->put('Hungary' => 'Budapest');
    $tree->put('Ireland' => 'Dublin');
    $tree->put('Egypt'   => 'Cairo');
    $tree->put('Germany' => 'Berlin');

    $tree->put('Alaska' => 'Anchorage'); # D'oh!
    $tree->delete('Alaska');

    print $tree->get('Ireland'); # 'Dublin'

    print $tree->min->key; # 'Egypt' 
    print $tree->max->key; # 'Ireland' 
    print $tree->size; # 6

    # print items, ordered by key
    my $it = $tree->iter;

    while(my $node = $it->next) {
        sprintf "key = %s, value = %s\n", $node->key, $node->val;
    }

    # print items in reverse order
    $it = $tree->rev_iter;

    while(my $node = $it->next) {
        sprintf "key = %s, value = %s\n", $node->key, $node->val;
    }

    # Hash interface
    tie my %capital, 'Tree::RB';

    # or do this to store items in descending order 
    tie my %capital, 'Tree::RB', sub { $_[1] cmp $_[0] };

    $capital{'France'}  = 'Paris';
    $capital{'England'} = 'London';
    $capital{'Hungary'} = 'Budapest';
    $capital{'Ireland'} = 'Dublin';
    $capital{'Egypt'}   = 'Cairo';
    $capital{'Germany'} = 'Berlin';

    # print items in order
    while(my ($key, $val) = each %capital) {
        printf "key = $key, value = $val\n";
    }

=head1 DESCRIPTION

This is a Perl implementation of the Red/Black tree, a type of balanced binary search tree. 

A tied hash interface is also provided to allow ordered hashes to be used.

See the Wikipedia article at L<http://en.wikipedia.org/wiki/Red-black_tree> for more on Red/Black trees.


=head1 INTERFACE

=head2 new([CODEREF])

Creates and returns a new tree. If a reference to a subroutine is passed to
new(), the subroutine will be used to override the tree's default lexical
ordering and provide a user a defined ordering. 

This subroutine should be just like a comparator subroutine used with L<sort>, 
except that it doesn't do the $a, $b trick.

For example, to get a case insensitive ordering

    my $tree = Tree::RB->new(sub { lc $_[0] cmp lc $_[1]});
    $tree->put('Wall'  => 'Larry');
    $tree->put('Smith' => 'Agent');
    $tree->put('mouse' => 'micky');
    $tree->put('duck'  => 'donald');

    my $it = $tree->iter;

    while(my $node = $it->next) {
        sprintf "key = %s, value = %s\n", $node->key, $node->val;
    }

=head2 resort(CODEREF)

Changes the ordering of nodes within the tree. The new ordering is
specified by a comparator subroutine which must be passed to resort().

See L</new> for further information about the comparator.

=head2 size()

Returns the number of nodes in the tree.

=head2 root()

Returns the root node of the tree. This will either be undef
if no nodes have been added to the tree, or a L<Tree::RB::Node> object.
See the L<Tree::RB::Node> manual page for details on the Node object.

=head2 min()

Returns the node with the minimal key.

=head2 max()

Returns the node with the maximal key.

=head2 lookup(KEY, [MODE])

When called in scalar context, lookup(KEY) returns the value
associated with KEY.

When called in list context, lookup(KEY) returns a list whose first
element is the value associated with KEY, and whose second element
is the node containing the key/value.

An optional MODE parameter can be passed to lookup() to influence
which key is returned.

The values of MODE are constants that are exported on demand by
Tree::RB

    use Tree::RB qw[LUEQUAL LUGTEQ LULTEQ LUGREAT LULESS LUNEXT LUPREV];

=over

=item LUEQUAL

Returns node exactly matching the key.

=item LUGTEQ

Returns the node exactly matching the specified key, 
if this is not found then the next node that is greater than the specified key is returned.

=item LULTEQ

Returns the node exactly matching the specified key, 
if this is not found then the next node that is less than the specified key is returned.

=item LUGREAT

Returns the node that is just greater than the specified key - not equal to. 
This mode is similar to LUNEXT except that the specified key need not exist in the tree.

=item LULESS

Returns the node that is just less than the specified key - not equal to. 
This mode is similar to LUPREV except that the specified key need not exist in the tree.

=item LUNEXT

Looks for the key specified, if not found returns C<undef>. 
If the node is found returns the next node that is greater than 
the one found (or C<undef> if there is no next node). 

This can be used to step through the tree in order.

=item LUPREV

Looks for the key specified, if not found returns C<undef>. 
If the node is found returns the previous node that is less than 
the one found (or C<undef> if there is no previous node). 

This can be used to step through the tree in reverse order.

=back

=head2 get(KEY)

get() is an alias for lookup().

=head2 iter([KEY])

Returns an iterator object that can be used to traverse the tree in order.

The iterator object supports a 'next' method that returns the next node in the
tree or undef if all of the nodes have been visited.

See the synopsis for an example.

If a key is supplied, the iterator returned will traverse the tree in order starting from
the node with key greater than or equal to the specified key.

    $it = $tree->iter('France');
    my $node = $it->next;
    print $node->key; # -> 'France'

=head2 rev_iter([KEY])

Returns an iterator object that can be used to traverse the tree in reverse order.

If a key is supplied, the iterator returned will traverse the tree in order starting from
the node with key less than or equal to the specified key.

    $it = $tree->rev_iter('France');
    my $node = $it->next;
    print $node->key; # -> 'England'

=head2 hseek(KEY, [{-reverse => 1|0}])

For tied hashes, determines the next entry to be returned by each.

    tie my %capital, 'Tree::RB';

    $capital{'France'}  = 'Paris';
    $capital{'England'} = 'London';
    $capital{'Hungary'} = 'Budapest';
    $capital{'Ireland'} = 'Dublin';
    $capital{'Egypt'}   = 'Cairo';
    $capital{'Germany'} = 'Berlin';
    tied(%capital)->hseek('Germany');

    ($key, $val) = each %capital;
    print "$key, $val"; # -> Germany, Berlin 

The direction of iteration can be reversed by passing a hashref with key '-reverse' and value 1
to hseek after or instead of KEY, e.g. to iterate over the hash in reverse order:

    tied(%capital)->hseek({-reverse => 1});
    $key = each %capital;
    print $key; # -> Ireland 

The following calls are equivalent

    tied(%capital)->hseek('Germany', {-reverse => 1});
    tied(%capital)->hseek({-key => 'Germany', -reverse => 1});

=head2 put(KEY, VALUE)

Adds a new node to the tree. 

The first argument is the key of the node, the second is its value. 

If a node with that key already exists, its value is replaced with 
the given value and the old value is returned. Otherwise, undef is returned.

=head2 delete(KEY)

If the tree has a node with the specified key, that node is
deleted from the tree and returned, otherwise C<undef> is returned.


=head1 DEPENDENCIES

L<enum>


=head1 INCOMPATIBILITIES

None reported.


=head1 BUGS AND LIMITATIONS

Please report any bugs or feature requests to
C<bug-tree-rb@rt.cpan.org>, or through the web interface at
L<http://rt.cpan.org>.


=head1 AUTHOR

Arun Prasad  C<< <arunbear@cpan.org> >>

Some documentation has been borrowed from Benjamin Holzman's L<Tree::RedBlack>
and Damian Ivereigh's libredblack (L<http://libredblack.sourceforge.net/>).

=head1 ACKNOWLEDGEMENTS

Thanks for bug reports go to Anton Petrusevich, Wes Thompson and Christopher Gurnee.

=head1 LICENCE AND COPYRIGHT

Copyright (c) 2007, Arun Prasad C<< <arunbear@cpan.org> >>. All rights reserved.

This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See L<perlartistic>.


=head1 DISCLAIMER OF WARRANTY

BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
NECESSARY SERVICING, REPAIR, OR CORRECTION.

IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE
LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL,
OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE
THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.