Raw content of Bio::EnsEMBL::AlignStrainSlice =head1 LICENSE Copyright (c) 1999-2009 The European Bioinformatics Institute and Genome Research Limited. All rights reserved. This software is distributed under a modified Apache license. For license details, please see /info/about/code_licence.html =head1 CONTACT Please email comments or questions to the public Ensembl developers list at <ensembl-dev@ebi.ac.uk>. Questions may also be sent to the Ensembl help desk at <helpdesk@ensembl.org>. =cut =head1 NAME Bio::EnsEMBL::AlignStrainSlice - Represents the slice of the genome aligned with certain strains (applying the variations/indels) =head1 SYNOPSIS $sa = $db->get_SliceAdaptor; $slice = $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 ); $strainSlice1 = $slice->get_by_Strain($strain_name1); $strainSlice2 = $slice->get_by_Strain($strain_name2); my @strainSlices; push @strainSlices, $strainSlice1; push @strainSlices, $strainSlice2; $alignSlice = Bio::EnsEMBL::AlignStrainSlice->new( -SLICE => $slice, -STRAINS => \@strainSlices ); # Get coordinates of variation in alignSlice my $alleleFeatures = $strainSlice1->get_all_AlleleFeature_Slice(); foreach my $af ( @{$alleleFeatures} ) { my $new_feature = $alignSlice->alignFeature( $af, $strainSlice1 ); print( "Coordinates of the feature in AlignSlice are: ", $new_feature->start, "-", $new_feature->end, "\n" ); } =head1 DESCRIPTION A AlignStrainSlice object represents a region of a genome align for certain strains. It can be used to align certain strains to a reference slice. =head1 METHODS =cut package Bio::EnsEMBL::AlignStrainSlice; use strict; use Bio::EnsEMBL::Utils::Argument qw(rearrange); use Bio::EnsEMBL::Mapper; use Bio::EnsEMBL::Mapper::RangeRegistry; use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning); use Data::Dumper; =head2 new Arg[1] : Bio::EnsEMBL::Slice $Slice Arg[2] : listref of Bio::EnsEMBL::StrainSlice $strainSlice Example : push @strainSlices, $strainSlice1; push @strainSlices, $strainSlice2; ..... push @strainSlices, $strainSliceN; $alignStrainSlice = Bio::EnsEMBL::AlignStrainSlice->new(-SLICE => $slice, -STRAIN => \@strainSlices); Description : Creates a new Bio::EnsEMBL::AlignStrainSlice object that will contain a mapper between the Slice object, plus all the indels from the different Strains ReturnType : Bio::EnsEMBL::AlignStrainSlice Exceptions : none Caller : general =cut sub new{ my $caller = shift; my $class = ref($caller) || $caller; my ($slice, $strainSlices) = rearrange([qw(SLICE STRAINS)],@_); #check that both StrainSlice and Slice are identical (must have been defined in the same slice) foreach my $strainSlice (@{$strainSlices}){ if (($strainSlice->start != $slice->start) || ($strainSlice->end != $slice->end) || ($strainSlice->seq_region_name ne $slice->seq_region_name)){ warning("Not possible to create Align object from different Slices"); return []; } } return bless{'slice' => $slice, 'strains' => $strainSlices}, $class; } =head2 alignFeature Arg[1] : Bio::EnsEMBL::Feature $feature Arg[2] : Bio::EnsEMBL::StrainSlice $strainSlice Example : $new_feature = $alignSlice->alignFeature($feature, $strainSlice); Description : Creates a new Bio::EnsEMBL::Feature object that aligned to the AlignStrainSlice object. ReturnType : Bio::EnsEMBL::Feature Exceptions : none Caller : general =cut sub alignFeature{ my $self = shift; my $feature = shift; #check that the object is a Feature if (!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')){ throw("Bio::EnsEMBL::Feature object expected"); } #and align it to the AlignStrainSlice object my $mapper_strain = $self->mapper(); my @results; if ($feature->start > $feature->end){ #this is an Indel, map it with the special method @results = $mapper_strain->map_indel('Slice',$feature->start, $feature->end, $feature->strand,'Slice'); #and modify the coordinates according to the length of the indel $results[0]->end($results[0]->start + $feature->length_diff -1); } else{ @results = $mapper_strain->map_coordinates('Slice',$feature->start, $feature->end, $feature->strand,'Slice'); } #get need start and end of the new feature, aligned ot AlignStrainSlice my @results_ordered = sort {$a->start <=> $b->start} @results; my %new_feature = %$feature; #make a shallow copy of the Feature $new_feature{'start'}= $results_ordered[0]->start(); $new_feature{'end'} = $results_ordered[-1]->end(); #get last element of the array, the end of the slice return bless \%new_feature, ref($feature); } #getter for the mapper between the Slice and the different StrainSlice objects sub mapper{ my $self = shift; if (!defined $self->{'mapper'}){ #get the alleleFeatures in all the strains if (!defined $self->{'indels'}){ #when the list of indels is not defined, get them $self->{'indels'} = $self->_get_indels(); } my $indels = $self->{'indels'}; #gaps in reference slice my $mapper = Bio::EnsEMBL::Mapper->new('Slice', 'AlignStrainSlice'); my $start_slice = 1; my $end_slice; my $start_align = 1; my $end_align; my $length_indel = 0; my $length_acum_indel = 0; foreach my $indel (@{$indels}){ $end_slice = $indel->[0] - 1; $end_align = $indel->[0] - 1 + $length_acum_indel; #we must consider length previous indels $length_indel = $indel->[1] - $indel->[0] + 1; $mapper->add_map_coordinates('Slice',$start_slice,$end_slice,1,'AlignStrainSlice',$start_align,$end_align); $mapper->add_indel_coordinates('Slice',$end_slice + 1,$end_slice,1,'AlignStrainSlice',$end_align + 1,$end_align + $length_indel); $start_slice = $end_slice + 1; $start_align = $indel->[1] + 1 + $length_acum_indel; #we must consider legnth previous indels $length_acum_indel += $length_indel; } if ($start_slice <= $self->length){ $mapper->add_map_coordinates('Slice',$start_slice,$self->length,1,'AlignStrainSlice',$start_align,$start_align + $self->length - $start_slice) } $self->{'mapper'} = $mapper; } return $self->{'mapper'}; } #returns the length of the AlignSlice: length of the Slice plus the gaps sub length{ my $self = shift; my $length; if (!defined $self->{'indels'}){ #when the list of indels is not defined, get them $self->{'indels'} = $self->_get_indels(); } $length = $self->{'slice'}->length; map {$length += ($_->[1] - $_->[0] + 1)} @{$self->{'indels'}}; return $length; } =head2 strains Args : None Description: Returns list with all strains used to define this AlignStrainSlice object Returntype : listref of Bio::EnsEMBL::StrainSlice objects Exceptions : none Caller : general =cut sub strains{ my $self = shift; return $self->{'strains'}; } =head2 Slice Args : None Description: Returns slice where the AlignStrainSlice is defined Returntype : Bio::EnsEMBL::Slice object Exceptions : none Caller : general =cut sub Slice{ my $self = shift; return $self->{'slice'}; } #method to retrieve, in order, a list with all the indels in the different strains sub _get_indels{ my $self = shift; #go throuh all the strains getting ONLY the indels (length_diff <> 0) my @indels; foreach my $strainSlice (@{$self->strains}){ my $differences = $strainSlice->get_all_AlleleFeatures_Slice(); #need to check there are differences.... foreach my $af (@{$differences}){ #if length is 0, but is a -, it is still a gap in the strain if (($af->length_diff != 0) || ($af->length_diff == 0 && $af->allele_string =~ /-/)){ push @indels, $af; } } } #need to overlap the gaps using the RangeRegistry module my $range_registry = Bio::EnsEMBL::Mapper::RangeRegistry->new(); foreach my $indel (@indels){ #in the reference and the strain there is a gap $range_registry->check_and_register(1,$indel->start,$indel->start) if ($indel->length_diff == 0); #deletion in reference slice $range_registry->check_and_register(1,$indel->start, $indel->end ) if ($indel->length_diff < 0); #insertion in reference slice $range_registry->check_and_register(1,$indel->start,$indel->start + $indel->length_diff - 1) if ($indel->length_diff > 0); } #and return all the gap coordinates.... return $range_registry->get_ranges(1); } =head2 get_all_Slices Args : none Description: This Slice is made of several Bio::EnsEMBL::StrainSlices sequence. This method returns these StrainSlices (or part of them) with the original coordinates Returntype : listref of Bio::EnsEMBL::StrainSlice objects Exceptions : end should be at least as big as start Caller : general =cut sub get_all_Slices { my $self = shift; my @strains; #add the reference strain my $dbVar = $self->Slice->adaptor->db->get_db_adaptor('variation'); unless($dbVar) { warning("Variation database must be attached to core database to " . "retrieve variation information" ); return ''; } my $indAdaptor = $dbVar->get_IndividualAdaptor(); my $ref_name = $indAdaptor->get_reference_strain_name; my $ref_strain = Bio::EnsEMBL::StrainSlice->new( -START => $self->Slice->{'start'}, -END => $self->Slice->{'end'}, -STRAND => $self->Slice->{'strand'}, -ADAPTOR => $self->Slice->{'adaptor'}, -SEQ => $self->Slice->{'seq'}, -SEQ_REGION_NAME => $self->Slice->{'seq_region_name'}, -SEQ_REGION_LENGTH => $self->Slice->{'seq_region_length'}, -COORD_SYSTEM => $self->Slice->{'coord_system'}, -STRAIN_NAME => $ref_name, ); #this is a fake reference alisce, should not contain any alleleFeature undef $ref_strain->{'alleleFeatures'}; push @strains, @{$self->strains}; my $new_feature; my $indel; my $aligned_features; my $indels = (); #reference to a hash containing indels in the different strains #we need to realign all Features in the different Slices and add '-' in the reference Slice foreach my $strain (@{$self->strains}){ foreach my $af (@{$strain->get_all_AlleleFeatures_Slice()}){ $new_feature = $self->alignFeature($af); #align feature in AlignSlice coordinates push @{$aligned_features},$new_feature if($new_feature->seq_region_start <= $strain->end); #some features might map outside slice if ($af->start != $af->end){ #an indel, need to add to the reference, and realign in the strain #make a shallow copy of the indel - clear it first! $indel = undef; %{$indel} = %{$new_feature}; bless $indel, ref($new_feature); $indel->allele_string('-'); push @{$indels},$indel; #and include in the list of potential indels } } next if (!defined $aligned_features); undef $strain->{'alleleFeatures'}; #remove all features before adding new aligned features push @{$strain->{'alleleFeatures'}}, @{$aligned_features}; undef $aligned_features; } push @strains, $ref_strain; #need to add indels in the different strains, if not present if (defined $indels){ foreach my $strain (@strains){ #inlcude the indels in the StrainSlice object push @{$strain->{'alignIndels'}},@{$indels}; } } return \@strains; } 1;