Raw content of Bio::EnsEMBL::Utils::Slice =head1 LICENSE Copyright (c) 1999-2009 The European Bioinformatics Institute and Genome Research Limited. All rights reserved. This software is distributed under a modified Apache license. For license details, please see /info/about/code_licence.html =head1 CONTACT Please email comments or questions to the public Ensembl developers list at <ensembl-dev@ebi.ac.uk>. Questions may also be sent to the Ensembl help desk at <helpdesk@ensembl.org>. =cut =head1 NAME Bio::EnsEMBL::Utils::Slice - Utility functions for slices =head1 SYNOPSIS use Bio::EnsEMBL::Utils::Slice qw(split_Slices); # ... # get all chromosomes in the database my $slices = $slice_adaptor->fetch_all('chromosome'); # split the chromosomes into equal chunks of size less than 1MB # with an overlap of 1kb $slices = split_Slices( $slices, 1e6, 1e3 ); =head1 METHODS =cut package Bio::EnsEMBL::Utils::Slice; use strict; use warnings; use Exporter; use vars qw(@ISA @EXPORT_OK); @ISA = qw(Exporter); @EXPORT_OK = qw(&split_Slices); use Bio::EnsEMBL::Utils::Exception qw(throw); use POSIX; =head2 split_Slices Arg [1] : ref to list of slices Arg [2] : int maxlength of sub slices Arg [3] : int overlap length (optional) Example : my $sub_slices = split_Slices($slices,$maxlen,$overlap) Description: splits a slice into smaller slices Returntype : ref to list of slices Exceptions : maxlen <1 or overlap < 0 =cut sub split_Slices{ my ($slice_big,$max_length,$overlap)=@_; if(!defined($max_length) or $max_length < 1){ throw("maxlength needs to be set and > 0"); } if(!defined($overlap)){ $overlap = 0; } elsif($overlap < 0){ throw("negative overlaps not allowed"); } my @out=(); foreach my $slice (@$slice_big){ my $start = $slice->start; my $end; my $multiple; my $number; my $length = $slice->length; if($max_length && ($length > $overlap)) { #No seq region may be longer than max_length but we want to make #them all similar size so that the last one isn't much shorter. #Divide the seq_region into the largest equal pieces that are shorter #than max_length #calculate number of slices to create $number = ($length-$overlap) / ($max_length-$overlap); $number = ceil($number); #round up to int #calculate length of created slices $multiple = $length / $number; $multiple = floor($multiple); #round down to int } else { #just one slice of the whole seq_region $number = 1; $multiple = $length; } my $i; for(my $i=0; $i < $number; $i++) { $end = $start + $multiple + $overlap; #any remainder gets added to the last slice of the seq_region $end = $slice->end if($i == $number-1); push @out, Bio::EnsEMBL::Slice->new (-START => $start, -END => $end, -STRAND => 1, -SEQ_REGION_NAME => $slice->seq_region_name, -SEQ_REGION_LENGTH => $length, -COORD_SYSTEM => $slice->coord_system, -ADAPTOR => $slice->adaptor); $start += $multiple + 1; } } return \@out; } 1;