use Bio::EnsEMBL::Mapper::RangeRegistry;
$rr = Bio::EnsEMBL::Mapper::RangeRegistry->new();
# Get a fixed width chunk around the range of intereset. This
# will be used if any registration is actually necessary.
$chunk_start = ( $start >> 20 ) << 20 + 1;
$chunk_end = ( ( $end >> 20 ) + 1 ) << 20;
# Check if any registration is necessary for the range. If it is
# register a large chunked area instead and return a listref of
# unregistered areas that need to be loaded.
if (
$pairs = $rr->check_and_register(
$id, $start, $end, $chunk_start, $chunk_end
) )
{
foreach my $pair (@$pairs) {
my ( $pair_start, $pair_end ) = @$pair;
# Fetch mappings for these regions from the assembly table and
# load them into the mapper.
...;
}
} else {
# The range ($start - $end) is already registered
...;
}
# Check if any registration is necessary. If it is register the
# region and return a listref of pairs that need to be loaded.
if ( $pairs = $rr->check_and_register( $id, $start, $end ) ) {
...;
}
sub check_and_register
{ my ( $self, $id, $start, $end, $rstart, $rend ) = @_;
$rstart = $start if ( !defined($rstart) );
$rend = $end if ( !defined($rend) );
if ( !defined($id) || !defined($start) || !defined($end) ) {
throw("ID, start, end arguments are required");
}
if ( $start > $end ) {
throw("start argument must be less than end argument");
}
if ( $rstart > $rend ) {
throw(
"rend [$rstart] argument must be less than rend [$rend] argument"
);
}
if ( $rstart > $start ) {
throw("rstart must be less than or equal to start");
}
if ( $rend < $end ) {
throw("rend must be greater than or equal to end");
}
my $reg = $self->{'registry'};
my $list = $reg->{$id} ||= [];
my @gap_pairs;
my $len = scalar(@$list);
if ( $len == 0 ) {
$list->[0] = [ $rstart, $rend ];
return [ [ $rstart, $rend ] ];
}
my $start_idx = 0;
my $end_idx = $#$list;
my ( $mid_idx, $range );
while ( ( $end_idx - $start_idx ) > 1 ) {
$mid_idx = ( $start_idx + $end_idx ) >> 1;
$range = $list->[$mid_idx];
if ( $range->[1] < $rstart ) {
$start_idx = $mid_idx;
} else {
$end_idx = $mid_idx;
}
}
my ( $gap_start, $gap_end, $r_idx, $rstart_idx, $rend_idx );
$gap_start = $rstart;
for ( my $CUR = $start_idx ; $CUR < $len ; $CUR++ ) {
my ( $pstart, $pend ) = @{ $list->[$CUR] };
if ( $pstart <= $start && $pend >= $end ) {
return undef;
}
if ( $pend >= ( $rstart - 1 ) && $pstart <= ( $rend + 1 ) ) {
if ( !defined($rstart_idx) ) {
$rstart_idx = $CUR;
}
$rend_idx = $CUR;
}
if ( $pstart > $rstart ) {
$gap_end = ( $rend < $pstart ) ? $rend : $pstart - 1;
push @gap_pairs, [ $gap_start, $gap_end ];
}
$gap_start = ( $rstart > $pend ) ? $rstart : $pend + 1;
if ( $pend >= $rend && !defined($r_idx) ) {
$r_idx = $CUR;
last;
}
}
if ( $gap_start <= $rend ) {
push @gap_pairs, [ $gap_start, $rend ];
}
if ( defined($rstart_idx) ) {
my ( $new_start, $new_end );
if ( $rstart < $list->[$rstart_idx]->[0] ) {
$new_start = $rstart;
} else {
$new_start = $list->[$rstart_idx]->[0];
}
if ( $rend > $list->[$rend_idx]->[1] ) {
$new_end = $rend;
} else {
$new_end = $list->[$rend_idx]->[1];
}
splice( @$list, $rstart_idx,
$rend_idx - $rstart_idx + 1,
[ $new_start, $new_end ] );
} elsif ( defined($r_idx) ) {
splice( @$list, $r_idx, 0, [ $rstart, $rend ] );
} else {
push( @$list, [ $rstart, $rend ] );
}
return\@ gap_pairs;
}
} |
sub overlap_size
{ my ( $self, $id, $start, $end ) = @_;
my $overlap = 0;
if ( $start > $end ) { return 0 }
my $reg = $self->{'registry'};
my $list = $reg->{$id} ||= [];
my $len = scalar(@$list);
if ( $len == 0 ) {
return 0;
}
my $start_idx = 0;
my $end_idx = $#$list;
my ( $mid_idx, $range );
while ( ( $end_idx - $start_idx ) > 1 ) {
$mid_idx = ( $start_idx + $end_idx ) >> 1;
$range = $list->[$mid_idx];
if ( $range->[1] < $start ) {
$start_idx = $mid_idx;
} else {
$end_idx = $mid_idx;
}
}
for ( my $CUR = $start_idx ; $CUR < $len ; $CUR++ ) {
my ( $pstart, $pend ) = @{ $list->[$CUR] };
if ( $pstart > $end ) {
last;
}
if ( $pstart <= $start && $pend >= $end ) {
$overlap = $end - $start + 1;
last;
}
my $mstart = ( $start < $pstart ? $pstart : $start );
my $mend = ( $end < $pend ? $end : $pend );
if ( $mend - $mstart >= 0 ) {
$overlap += ( $mend - $mstart + 1 );
}
}
return $overlap;
}
} |