Raw content of Bio::EnsEMBL::Analysis::Tools::Algorithms::IntronCluster =head1 NAME IntronCluster =head1 SYNOPSIS =head1 DESCRIPTION This object holds one or more introns which has been clustered =head1 CONTACT ba1@sanger.ac.uk =cut # Let the code begin ... package Bio::EnsEMBL::Analysis::Tools::Algorithms::IntronCluster; use vars qw(@ISA); use strict; use Bio::EnsEMBL::Intron; use Bio::EnsEMBL::Root; use Bio::EnsEMBL::Utils::Exception qw(throw warning ); @ISA = qw(Bio::EnsEMBL::Root); =head1 METHODS =cut =head2 Arg[1] : Example : my $newcluster = Bio::EnsEMBL::Analysis::Tools::Algorithms::IntronCluster->new() ; Description : Create new IntronCluster Return type : Bio::EnsEMBL::Analysis::Tools::Algorithms::IntronCluster Exceptions : If arg passed in =cut sub new { my ($class,$whatever)=@_; if (ref($class)){ $class = ref($class); } my $self = {}; bless($self,$class); if ($whatever){ throw( "Can't pass an object to new() method. Use put_Introns() to include Bio::EnsEMBL::Intron in cluster"); } $self->{_cached_start} = undef; $self->{_cached_end} = undef; $self->{_cached_strand} = undef; $self->{v} = 0 ; # verbosity return $self; } =head2 Arg[1] : arrayref of Bio::EnsEMBL::Intron Arg[2] : Bio::EnsEMBL::Transcript Example : $intron_cluster->put_Introns([$intron],$trans); Description : add an intron to the cluster Return type : None Exceptions : Type-sets not set =cut sub put_Introns { my ($self, $new_introns, $transcript, $ignore_strand)= @_; if ( !defined( $self->{'_types_sets'} ) ){ throw( "Cluster lacks references to intron-types, unable to put the intron"); } INTRON: foreach my $intron (@$new_introns){ throw("undef for intron") if (!$intron); $self->_add_transcript_reference($intron,$transcript); my $intron_biotype = $transcript->biotype; foreach my $set_name ( keys %{$self->{'_types_sets'}}) { my $set = $self->{'_types_sets'}{$set_name}; foreach my $type ( @{$set} ){ if ($intron_biotype eq $type) { push ( @{ $self->{'_intron_sets'}{$set_name} }, $intron ); if (defined($self->{_cached_start})) { if ($intron->start != $self->{_cached_start}) { throw("Failed putting intron: start"); } } if (defined($self->{_cached_end})) { if ($intron->end != $self->{_cached_end}) { throw("Failed putting intron: end"); } } if (!$ignore_strand) { if (defined($self->{_cached_strand})) { if ($intron->strand != $self->{_cached_strand}) { throw("Failed putting intron: strand"); } } } next INTRON; } } } throw("Failed putting intron of type " . $intron_biotype . "\n"); } } =head2 Arg[1] : None Example : foreach my $intron ($self->get_Introns) { Description : Gets all introns in an intron cluster Return type : Bio::EnsEMBL::Intron =cut sub get_Introns { my $self = shift @_; my @introns; if (!defined( $self->{'_intron_sets'} ) ) { $self->warning("The intron array you try to retrieve is empty"); @introns = (); } foreach my $set_name (keys %{$self->{'_intron_sets'}}) { push( @introns, @{ $self->{'_intron_sets'}{$set_name} } ); } return \@introns; } =head2 Arg[1] : None Example : $strand = $intron_cluster->strand Description : Gets strand of intron cluster Return type : Int =cut sub strand{ my $self = shift; if (!defined($self->{_cached_strand})) { my @introns = @{$self->get_Introns}; unless (@introns){ $self->warning("cannot retrieve the strand in a cluster with no introns"); } my $strand; foreach my $intron (@introns){ # looking at only 1 should be enough $strand = $intron->prev_Exon->strand if (!$strand); my $tmp_strand = $intron->prev_Exon->strand; if ($tmp_strand != $strand) { throw("introns not on the same strand"); } } $self->{_cached_strand} = $strand; } return $self->{_cached_strand}; } =head2 Arg[1] : String (set name) Example : my @introns = $intron_cluster->get_Introns_by_Set($setname); Description : Gets all introns in the intron cluster belonging to the set Return type : arrayref of Bio::EnsEMBL::Intron =cut sub get_Introns_by_Set() { my ($self,$set) = @_; unless ($set){ throw( "must provide a set"); } my @selected_introns; if ($self->{v}){ for (keys %{ $self->{_intron_sets} } ) { print " i know the following sets : $_\n" ; } } if (!defined($self->{'_intron_sets'}{$set})) { # throw("No introns of set name $set"); warning("No introns of set name $set in cluster"); }else{ push @selected_introns, @{$self->{'_intron_sets'}{$set}}; } return \@selected_introns; } =head2 Arg[1] : None Example : $start = $intron_cluster->start Description : Gets start position of intron cluster (The smallest number, regardless of strand) Return type : Int =cut sub start{ my ($self) = @_; if (!defined($self->{_cached_start})) { my $start; foreach my $intron (@{$self->get_Introns}) { my $this_start = $intron->start; unless ( $start ){ $start = $this_start; } if ( $this_start < $start ){ $start = $this_start; } } $self->{_cached_start} = $start; } return $self->{_cached_start}; } =head2 Arg[1] : None Example : $end = $intron_cluster->end Description : Gets end position of intron cluster (The largest number, regardless of strand) Return type : Int =cut sub end{ my ($self) = @_; if (!defined($self->{_cached_end})) { my $end; foreach my $intron (@{$self->get_Introns}) { my $this_end = $intron->end; unless ( $end ){ $end = $this_end; } if ( $this_end > $end ){ $end = $this_end; } } $self->{_cached_end} = $end; } return $self->{_cached_end}; } =head2 Arg[1] : Bio::EnsEMBL::Intron Example : @transcripts = @{$clust->get_transcripts_having_Intron_in_IntronCluster($intron)}; Description : Gets all transcripts in the intron cluster that have this intron Return type : Arrayref of Bio::EnsEMBL::Transcripts =cut sub get_transcripts_having_Intron_in_IntronCluster { my ($self,$intron) = @_; my @transcript_array; my %transhash = $self->each_transcripts_introns; TRANS: foreach my $trans_id (keys %transhash) { foreach my $intron_to_test (@{$transhash{$trans_id}{'introns'}}) { #print STDERR "transcript $trans_id, intron start ".$intron_to_test->start.", intron end ".$intron_to_test->end.", self start ".$self->start.", self end ".$self->end."\n"; if($intron_to_test->start == $self->start && $intron_to_test->end == $self->end){ push @transcript_array, $transhash{$trans_id}{'transcript'}; next TRANS; } } } return \@transcript_array; } =head2 Arg[1] : None Example : my %transhash = $self->each_transcripts_introns; Description : Get a hash of introns keyed on transcript unique refernce key (dbname_dbID) Return type : Hash =cut sub each_transcripts_introns { my $self = shift; return %{$self->{_transcripthash}}; } =head2 Arg[1] : Bio::EnsEMBL::Intron Arg[2] : Bio::EnsEMBL::Transcript Example : $self->_add_transcript_reference($intron,$transcript); Description : Create an internal hash of introns keyed on transcript unique reference key Return type : None =cut sub _add_transcript_reference { my ($self,$intron,$transcript) = @_; #if there's not already a reference to transcript stored make an arrayref if (!$self->contains_transcript($transcript)) { $self->{_transcripthash}{$transcript->adaptor->dbc->dbname."_".$transcript->dbID}{'introns'} = []; $self->{_transcripthash}{$transcript->adaptor->dbc->dbname."_".$transcript->dbID}{'transcript'} = $transcript; } # store introns of transcript (key: transcript) push @{$self->{_transcripthash}{$transcript->adaptor->dbc->dbname."_".$transcript->dbID}{'introns'}}, $intron; } =head2 Arg[1] : Bio::EnsEMBL::Transcript Example : if (!$self->contains_transcript($transcript)) { Description : Looks to see if this transcript exists in the internal transcript/intron hash Return type : Boolean =cut sub contains_transcript { my ($self,$transcript) = @_; return (exists $self->{_transcripthash}{$transcript->adaptor->dbc->dbname."_".$transcript->dbID}); } 1;